mirror of
https://github.com/uutils/coreutils
synced 2025-01-25 03:15:35 +00:00
uucore: start work on a completely new printf implementation
This commit is contained in:
parent
f37318d37c
commit
a3e68d5bbd
23 changed files with 672 additions and 2714 deletions
|
@ -76,7 +76,7 @@ entries = ["libc"]
|
|||
fs = ["libc", "winapi-util", "windows-sys"]
|
||||
fsext = ["libc", "time", "windows-sys"]
|
||||
lines = []
|
||||
memo = ["itertools"]
|
||||
format = ["itertools"]
|
||||
mode = ["libc"]
|
||||
perms = ["libc", "walkdir"]
|
||||
process = ["libc"]
|
||||
|
|
|
@ -8,14 +8,12 @@ pub mod fs;
|
|||
pub mod fsext;
|
||||
#[cfg(feature = "lines")]
|
||||
pub mod lines;
|
||||
#[cfg(feature = "memo")]
|
||||
pub mod memo;
|
||||
#[cfg(feature = "format")]
|
||||
pub mod format;
|
||||
#[cfg(feature = "ringbuffer")]
|
||||
pub mod ringbuffer;
|
||||
#[cfg(feature = "sum")]
|
||||
pub mod sum;
|
||||
#[cfg(feature = "memo")]
|
||||
mod tokenize;
|
||||
|
||||
// * (platform-specific) feature-gated modules
|
||||
// ** non-windows (i.e. Unix + Fuchsia)
|
||||
|
|
144
src/uucore/src/lib/features/format/mod.rs
Normal file
144
src/uucore/src/lib/features/format/mod.rs
Normal file
|
@ -0,0 +1,144 @@
|
|||
//! Main entry point for our implementation of printf.
|
||||
//!
|
||||
//! The [`printf`] and [`sprintf`] closely match the behavior of the
|
||||
//! corresponding C functions: the former renders a formatted string
|
||||
//! to stdout, the latter renders to a new [`String`] object.
|
||||
//!
|
||||
//! In addition to the [`printf`] and [`sprintf`] functions, we expose the
|
||||
//! [`Format`] struct, which represents a parsed format string. This reduces
|
||||
//! the need for parsing a format string multiple times and assures that no
|
||||
//! parsing errors occur during writing.
|
||||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||
|
||||
// mod num_format;
|
||||
mod spec;
|
||||
|
||||
use spec::Spec;
|
||||
use std::io::{stdout, Write};
|
||||
|
||||
pub enum FormatError {
|
||||
SpecError,
|
||||
IoError(std::io::Error),
|
||||
NoMoreArguments,
|
||||
InvalidArgument(FormatArgument),
|
||||
}
|
||||
|
||||
/// A single item to format
|
||||
enum FormatItem {
|
||||
/// A format specifier
|
||||
Spec(Spec),
|
||||
/// Some plain text
|
||||
Text(Vec<u8>),
|
||||
/// A single character
|
||||
///
|
||||
/// Added in addition to `Text` as an optimization.
|
||||
Char(u8),
|
||||
}
|
||||
|
||||
pub enum FormatArgument {
|
||||
Char(char),
|
||||
String(String),
|
||||
UnsignedInt(u64),
|
||||
SignedInt(i64),
|
||||
Float(f64),
|
||||
}
|
||||
|
||||
impl FormatItem {
|
||||
fn write<'a>(&self, mut writer: impl Write, args: &mut impl Iterator<Item = FormatArgument>) -> Result<(), FormatError> {
|
||||
match self {
|
||||
FormatItem::Spec(spec) => spec.write(writer, args),
|
||||
FormatItem::Text(bytes) => writer.write_all(bytes).map_err(FormatError::IoError),
|
||||
FormatItem::Char(char) => writer.write_all(&[*char]).map_err(FormatError::IoError),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_iter(fmt: &[u8]) -> impl Iterator<Item = Result<FormatItem, FormatError>> + '_ {
|
||||
let mut rest = fmt;
|
||||
std::iter::from_fn(move || {
|
||||
if rest.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
match rest.iter().position(|c| *c == b'%') {
|
||||
None => {
|
||||
let final_text = rest;
|
||||
rest = &[];
|
||||
Some(Ok(FormatItem::Text(final_text.into())))
|
||||
}
|
||||
Some(0) => {
|
||||
// Handle the spec
|
||||
rest = &rest[1..];
|
||||
match rest.get(0) {
|
||||
None => Some(Ok(FormatItem::Char(b'%'))),
|
||||
Some(b'%') => {
|
||||
rest = &rest[1..];
|
||||
Some(Ok(FormatItem::Char(b'%')))
|
||||
}
|
||||
Some(_) => {
|
||||
let spec = match Spec::parse(&mut rest) {
|
||||
Some(spec) => spec,
|
||||
None => return Some(Err(FormatError::SpecError)),
|
||||
};
|
||||
Some(Ok(FormatItem::Spec(spec)))
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(i) => {
|
||||
// The `after` slice includes the % so it will be handled correctly
|
||||
// in the next iteration.
|
||||
let (before, after) = rest.split_at(i);
|
||||
rest = after;
|
||||
return Some(Ok(FormatItem::Text(before.into())));
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Write a formatted string to stdout.
|
||||
///
|
||||
/// `format_string` contains the template and `args` contains the
|
||||
/// arguments to render into the template.
|
||||
///
|
||||
/// See also [`sprintf`], which creates a new formatted [`String`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// use uucore::format::printf;
|
||||
///
|
||||
/// printf("hello %s", &["world".to_string()]).unwrap();
|
||||
/// // prints "hello world"
|
||||
/// ```
|
||||
pub fn printf(format_string: &[u8], arguments: impl IntoIterator<Item = FormatArgument>) -> Result<(), FormatError> {
|
||||
printf_writer(stdout(), format_string, arguments)
|
||||
}
|
||||
|
||||
fn printf_writer(mut writer: impl Write, format_string: &[u8], args: impl IntoIterator<Item = FormatArgument>) -> Result<(), FormatError> {
|
||||
let mut args = args.into_iter();
|
||||
for item in parse_iter(format_string) {
|
||||
item?.write(&mut writer, &mut args)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a new formatted string.
|
||||
///
|
||||
/// `format_string` contains the template and `args` contains the
|
||||
/// arguments to render into the template.
|
||||
///
|
||||
/// See also [`printf`], which prints to stdout.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// use uucore::format::sprintf;
|
||||
///
|
||||
/// let s = sprintf("hello %s", &["world".to_string()]).unwrap();
|
||||
/// assert_eq!(s, "hello world".to_string());
|
||||
/// ```
|
||||
pub fn sprintf(format_string: &[u8], arguments: impl IntoIterator<Item = FormatArgument>) -> Result<Vec<u8>, FormatError> {
|
||||
let mut writer = Vec::new();
|
||||
printf_writer(&mut writer, format_string, arguments)?;
|
||||
Ok(writer)
|
||||
}
|
523
src/uucore/src/lib/features/format/spec.rs
Normal file
523
src/uucore/src/lib/features/format/spec.rs
Normal file
|
@ -0,0 +1,523 @@
|
|||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||
|
||||
use super::{FormatArgument, FormatError};
|
||||
use std::{fmt::Display, io::Write};
|
||||
|
||||
pub enum Spec {
|
||||
Char {
|
||||
width: Option<CanAsterisk<usize>>,
|
||||
align_left: bool,
|
||||
},
|
||||
String {
|
||||
width: Option<CanAsterisk<usize>>,
|
||||
align_left: bool,
|
||||
},
|
||||
SignedInt {
|
||||
width: Option<CanAsterisk<usize>>,
|
||||
positive_sign: PositiveSign,
|
||||
alignment: NumberAlignment,
|
||||
},
|
||||
UnsignedInt {
|
||||
variant: UnsignedIntVariant,
|
||||
width: Option<CanAsterisk<usize>>,
|
||||
alignment: NumberAlignment,
|
||||
},
|
||||
Float {
|
||||
variant: FloatVariant,
|
||||
case: Case,
|
||||
force_decimal: ForceDecimal,
|
||||
width: Option<CanAsterisk<usize>>,
|
||||
positive_sign: PositiveSign,
|
||||
alignment: NumberAlignment,
|
||||
precision: Option<CanAsterisk<usize>>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum UnsignedIntVariant {
|
||||
Decimal,
|
||||
Octal(Prefix),
|
||||
Hexadecimal(Case, Prefix),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
|
||||
pub enum FloatVariant {
|
||||
Decimal,
|
||||
Scientific,
|
||||
Shortest,
|
||||
Hexadecimal,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Case {
|
||||
Lowercase,
|
||||
Uppercase,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Prefix {
|
||||
No,
|
||||
Yes,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ForceDecimal {
|
||||
No,
|
||||
Yes,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum PositiveSign {
|
||||
None,
|
||||
Plus,
|
||||
Space,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum NumberAlignment {
|
||||
Left,
|
||||
RightSpace,
|
||||
RightZero,
|
||||
}
|
||||
|
||||
/// Precision and width specified might use an asterisk to indicate that they are
|
||||
/// determined by an argument.
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum CanAsterisk<T> {
|
||||
Fixed(T),
|
||||
Asterisk,
|
||||
}
|
||||
|
||||
/// Size of the expected type (ignored)
|
||||
///
|
||||
/// We ignore this parameter entirely, but we do parse it.
|
||||
/// It could be used in the future if the need arises.
|
||||
enum Length {
|
||||
/// signed/unsigned char ("hh")
|
||||
Char,
|
||||
/// signed/unsigned short int ("h")
|
||||
Short,
|
||||
/// signed/unsigned long int ("l")
|
||||
Long,
|
||||
/// signed/unsigned long long int ("ll")
|
||||
LongLong,
|
||||
/// intmax_t ("j")
|
||||
IntMaxT,
|
||||
/// size_t ("z")
|
||||
SizeT,
|
||||
/// ptrdiff_t ("t")
|
||||
PtfDiffT,
|
||||
/// long double ("L")
|
||||
LongDouble,
|
||||
}
|
||||
|
||||
impl Spec {
|
||||
pub fn parse(rest: &mut &[u8]) -> Option<Self> {
|
||||
// Based on the C++ reference, the spec format looks like:
|
||||
//
|
||||
// %[flags][width][.precision][length]specifier
|
||||
//
|
||||
// However, we have already parsed the '%'.
|
||||
|
||||
let mut minus = false;
|
||||
let mut plus = false;
|
||||
let mut space = false;
|
||||
let mut hash = false;
|
||||
let mut zero = false;
|
||||
|
||||
while let Some(x @ (b'-' | b'+' | b' ' | b'#' | b'0')) = rest.get(0) {
|
||||
match x {
|
||||
b'-' => minus = true,
|
||||
b'+' => plus = true,
|
||||
b' ' => space = true,
|
||||
b'#' => hash = true,
|
||||
b'0' => zero = true,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
*rest = &rest[1..]
|
||||
}
|
||||
|
||||
let width = eat_asterisk_or_number(rest);
|
||||
|
||||
let precision = if let Some(b'.') = rest.get(0) {
|
||||
Some(eat_asterisk_or_number(rest).unwrap_or(CanAsterisk::Fixed(0)))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let length = rest.get(0).and_then(|c| {
|
||||
Some(match c {
|
||||
b'h' => {
|
||||
if let Some(b'h') = rest.get(1) {
|
||||
*rest = &rest[1..];
|
||||
Length::Char
|
||||
} else {
|
||||
Length::Short
|
||||
}
|
||||
}
|
||||
b'l' => {
|
||||
if let Some(b'l') = rest.get(1) {
|
||||
*rest = &rest[1..];
|
||||
Length::Long
|
||||
} else {
|
||||
Length::LongLong
|
||||
}
|
||||
}
|
||||
b'j' => Length::IntMaxT,
|
||||
b'z' => Length::SizeT,
|
||||
b't' => Length::PtfDiffT,
|
||||
b'L' => Length::LongDouble,
|
||||
_ => return None,
|
||||
})
|
||||
});
|
||||
|
||||
if length.is_some() {
|
||||
*rest = &rest[1..];
|
||||
}
|
||||
|
||||
Some(match rest.get(0)? {
|
||||
b'c' => Spec::Char {
|
||||
width,
|
||||
align_left: minus,
|
||||
},
|
||||
b's' => Spec::String {
|
||||
width,
|
||||
align_left: minus,
|
||||
},
|
||||
b'd' | b'i' => Spec::SignedInt {
|
||||
width,
|
||||
alignment: match (minus, zero) {
|
||||
(true, _) => NumberAlignment::Left,
|
||||
(false, true) => NumberAlignment::RightZero,
|
||||
(false, false) => NumberAlignment::RightSpace,
|
||||
},
|
||||
positive_sign: match (plus, space) {
|
||||
(true, _) => PositiveSign::Plus,
|
||||
(false, true) => PositiveSign::Space,
|
||||
(false, false) => PositiveSign::None,
|
||||
},
|
||||
},
|
||||
c @ (b'u' | b'o' | b'x' | b'X') => {
|
||||
let prefix = match hash {
|
||||
false => Prefix::No,
|
||||
true => Prefix::Yes,
|
||||
};
|
||||
let alignment = match (minus, zero) {
|
||||
(true, _) => NumberAlignment::Left,
|
||||
(false, true) => NumberAlignment::RightZero,
|
||||
(false, false) => NumberAlignment::RightSpace,
|
||||
};
|
||||
let variant = match c {
|
||||
b'u' => UnsignedIntVariant::Decimal,
|
||||
b'o' => UnsignedIntVariant::Octal(prefix),
|
||||
b'x' => UnsignedIntVariant::Hexadecimal(Case::Lowercase, prefix),
|
||||
b'X' => UnsignedIntVariant::Hexadecimal(Case::Uppercase, prefix),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Spec::UnsignedInt {
|
||||
variant,
|
||||
width,
|
||||
alignment,
|
||||
}
|
||||
}
|
||||
c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Spec::Float {
|
||||
width,
|
||||
precision,
|
||||
variant: match c {
|
||||
b'f' | b'F' => FloatVariant::Decimal,
|
||||
b'e' | b'E' => FloatVariant::Scientific,
|
||||
b'g' | b'G' => FloatVariant::Shortest,
|
||||
b'a' | b'A' => FloatVariant::Hexadecimal,
|
||||
_ => unreachable!(),
|
||||
},
|
||||
force_decimal: match hash {
|
||||
false => ForceDecimal::No,
|
||||
true => ForceDecimal::Yes,
|
||||
},
|
||||
case: match c.is_ascii_uppercase() {
|
||||
false => Case::Lowercase,
|
||||
true => Case::Uppercase,
|
||||
},
|
||||
alignment: match (minus, zero) {
|
||||
(true, _) => NumberAlignment::Left,
|
||||
(false, true) => NumberAlignment::RightZero,
|
||||
(false, false) => NumberAlignment::RightSpace,
|
||||
},
|
||||
positive_sign: match (plus, space) {
|
||||
(true, _) => PositiveSign::Plus,
|
||||
(false, true) => PositiveSign::Space,
|
||||
(false, false) => PositiveSign::None,
|
||||
},
|
||||
},
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn write<'a>(
|
||||
&self,
|
||||
mut writer: impl Write,
|
||||
mut args: impl Iterator<Item = FormatArgument>,
|
||||
) -> Result<(), FormatError> {
|
||||
match self {
|
||||
&Spec::Char { width, align_left } => {
|
||||
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
|
||||
let arg = next_arg(&mut args)?;
|
||||
match arg {
|
||||
FormatArgument::Char(c) => write_padded(writer, c, width, false, align_left),
|
||||
_ => Err(FormatError::InvalidArgument(arg)),
|
||||
}
|
||||
}
|
||||
&Spec::String { width, align_left } => {
|
||||
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
|
||||
let arg = next_arg(&mut args)?;
|
||||
match arg {
|
||||
FormatArgument::String(s) => write_padded(writer, s, width, false, align_left),
|
||||
_ => Err(FormatError::InvalidArgument(arg)),
|
||||
}
|
||||
}
|
||||
&Spec::SignedInt {
|
||||
width,
|
||||
positive_sign,
|
||||
alignment,
|
||||
} => {
|
||||
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
|
||||
|
||||
let arg = next_arg(&mut args)?;
|
||||
let FormatArgument::SignedInt(i) = arg else {
|
||||
return Err(FormatError::InvalidArgument(arg));
|
||||
};
|
||||
|
||||
if i >= 0 {
|
||||
match positive_sign {
|
||||
PositiveSign::None => Ok(()),
|
||||
PositiveSign::Plus => write!(writer, "+"),
|
||||
PositiveSign::Space => write!(writer, " "),
|
||||
}
|
||||
.map_err(FormatError::IoError)?;
|
||||
}
|
||||
|
||||
match alignment {
|
||||
NumberAlignment::Left => write!(writer, "{i:<width$}"),
|
||||
NumberAlignment::RightSpace => write!(writer, "{i:>width$}"),
|
||||
NumberAlignment::RightZero => write!(writer, "{i:0>width$}"),
|
||||
}
|
||||
.map_err(FormatError::IoError)
|
||||
}
|
||||
&Spec::UnsignedInt {
|
||||
variant,
|
||||
width,
|
||||
alignment,
|
||||
} => {
|
||||
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
|
||||
|
||||
let arg = next_arg(args)?;
|
||||
let FormatArgument::SignedInt(i) = arg else {
|
||||
return Err(FormatError::InvalidArgument(arg));
|
||||
};
|
||||
|
||||
let s = match variant {
|
||||
UnsignedIntVariant::Decimal => format!("{i}"),
|
||||
UnsignedIntVariant::Octal(Prefix::No) => format!("{i:o}"),
|
||||
UnsignedIntVariant::Octal(Prefix::Yes) => format!("{i:#o}"),
|
||||
UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::No) => {
|
||||
format!("{i:x}")
|
||||
}
|
||||
UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::Yes) => {
|
||||
format!("{i:#x}")
|
||||
}
|
||||
UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::No) => {
|
||||
format!("{i:X}")
|
||||
}
|
||||
UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::Yes) => {
|
||||
format!("{i:#X}")
|
||||
}
|
||||
};
|
||||
|
||||
match alignment {
|
||||
NumberAlignment::Left => write!(writer, "{s:<width$}"),
|
||||
NumberAlignment::RightSpace => write!(writer, "{s:>width$}"),
|
||||
NumberAlignment::RightZero => write!(writer, "{s:0>width$}"),
|
||||
}
|
||||
.map_err(FormatError::IoError)
|
||||
}
|
||||
&Spec::Float {
|
||||
variant,
|
||||
case,
|
||||
force_decimal,
|
||||
width,
|
||||
positive_sign,
|
||||
alignment,
|
||||
precision,
|
||||
} => {
|
||||
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
|
||||
let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6);
|
||||
|
||||
let arg = next_arg(args)?;
|
||||
let FormatArgument::Float(f) = arg else {
|
||||
return Err(FormatError::InvalidArgument(arg));
|
||||
};
|
||||
|
||||
match positive_sign {
|
||||
PositiveSign::None => Ok(()),
|
||||
PositiveSign::Plus => write!(writer, "+"),
|
||||
PositiveSign::Space => write!(writer, " "),
|
||||
}
|
||||
.map_err(FormatError::IoError)?;
|
||||
|
||||
let s = match variant {
|
||||
FloatVariant::Decimal => format_float_decimal(f, precision, case, force_decimal),
|
||||
FloatVariant::Scientific => {
|
||||
format_float_scientific(f, precision, case, force_decimal)
|
||||
}
|
||||
FloatVariant::Shortest => format_float_shortest(f, precision, case, force_decimal),
|
||||
FloatVariant::Hexadecimal => todo!(),
|
||||
};
|
||||
|
||||
match alignment {
|
||||
NumberAlignment::Left => write!(writer, "{s:<width$}"),
|
||||
NumberAlignment::RightSpace => write!(writer, "{s:>width$}"),
|
||||
NumberAlignment::RightZero => write!(writer, "{s:0>width$}"),
|
||||
}
|
||||
.map_err(FormatError::IoError)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn format_float_decimal(
|
||||
f: f64,
|
||||
precision: usize,
|
||||
case: Case,
|
||||
force_decimal: ForceDecimal,
|
||||
) -> String {
|
||||
if !f.is_finite() {
|
||||
let mut s = format!("{f}");
|
||||
if case == Case::Lowercase {
|
||||
s.make_ascii_uppercase();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
if precision == 0 && force_decimal == ForceDecimal::Yes {
|
||||
format!("{f:.0}.")
|
||||
} else {
|
||||
format!("{f:.*}", precision)
|
||||
}
|
||||
}
|
||||
|
||||
fn format_float_scientific(
|
||||
f: f64,
|
||||
precision: usize,
|
||||
case: Case,
|
||||
force_decimal: ForceDecimal,
|
||||
) -> String {
|
||||
// If the float is NaN, -Nan, Inf or -Inf, format like any other float
|
||||
if !f.is_finite() {
|
||||
let mut s = format!("{f}");
|
||||
if case == Case::Lowercase {
|
||||
s.make_ascii_uppercase();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
let exponent: i32 = f.log10().floor() as i32;
|
||||
let normalized = f / 10.0_f64.powi(exponent);
|
||||
|
||||
let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal {
|
||||
"."
|
||||
} else {
|
||||
""
|
||||
};
|
||||
|
||||
let exp_char = match case {
|
||||
Case::Lowercase => 'e',
|
||||
Case::Uppercase => 'E',
|
||||
};
|
||||
|
||||
format!(
|
||||
"{normalized:.*}{additional_dot}{exp_char}{exponent:+03}",
|
||||
precision
|
||||
)
|
||||
}
|
||||
|
||||
// TODO: This could be optimized. It's not terribly important though.
|
||||
fn format_float_shortest(
|
||||
f: f64,
|
||||
precision: usize,
|
||||
case: Case,
|
||||
force_decimal: ForceDecimal,
|
||||
) -> String {
|
||||
let a = format_float_decimal(f, precision, case, force_decimal);
|
||||
let b = format_float_scientific(f, precision, case, force_decimal);
|
||||
|
||||
if a.len() > b.len() {
|
||||
b
|
||||
} else {
|
||||
a
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_asterisk(
|
||||
option: Option<CanAsterisk<usize>>,
|
||||
args: impl Iterator<Item = FormatArgument>,
|
||||
) -> Result<Option<usize>, FormatError> {
|
||||
Ok(match option {
|
||||
None => None,
|
||||
Some(CanAsterisk::Asterisk) => {
|
||||
let arg = next_arg(args)?;
|
||||
match arg {
|
||||
FormatArgument::UnsignedInt(u) => match usize::try_from(u) {
|
||||
Ok(u) => Some(u),
|
||||
Err(_) => return Err(FormatError::InvalidArgument(arg)),
|
||||
},
|
||||
_ => return Err(FormatError::InvalidArgument(arg)),
|
||||
}
|
||||
}
|
||||
Some(CanAsterisk::Fixed(w)) => Some(w),
|
||||
})
|
||||
}
|
||||
|
||||
fn next_arg(
|
||||
mut arguments: impl Iterator<Item = FormatArgument>,
|
||||
) -> Result<FormatArgument, FormatError> {
|
||||
arguments.next().ok_or(FormatError::NoMoreArguments)
|
||||
}
|
||||
|
||||
fn write_padded(
|
||||
mut writer: impl Write,
|
||||
text: impl Display,
|
||||
width: usize,
|
||||
pad_zero: bool,
|
||||
left: bool,
|
||||
) -> Result<(), FormatError> {
|
||||
match (left, pad_zero) {
|
||||
(false, false) => write!(writer, "{text: >width$}"),
|
||||
(false, true) => write!(writer, "{text:0>width$}"),
|
||||
// 0 is ignored if we pad left.
|
||||
(true, _) => write!(writer, "{text: <width$}"),
|
||||
}
|
||||
.map_err(FormatError::IoError)
|
||||
}
|
||||
|
||||
fn eat_asterisk_or_number(rest: &mut &[u8]) -> Option<CanAsterisk<usize>> {
|
||||
if let Some(b'*') = rest.get(0) {
|
||||
*rest = &rest[1..];
|
||||
Some(CanAsterisk::Asterisk)
|
||||
} else {
|
||||
eat_number(rest).map(CanAsterisk::Fixed)
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_number(rest: &mut &[u8]) -> Option<usize> {
|
||||
match rest.iter().position(|b| !b.is_ascii_digit()) {
|
||||
None | Some(0) => None,
|
||||
Some(i) => {
|
||||
// TODO: This might need to handle errors better
|
||||
// For example in case of overflow.
|
||||
let parsed = std::str::from_utf8(&rest[..i]).unwrap().parse().unwrap();
|
||||
*rest = &rest[i..];
|
||||
Some(parsed)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,175 +0,0 @@
|
|||
//! Main entry point for our implementation of printf.
|
||||
//!
|
||||
//! The [`printf`] and [`sprintf`] closely match the behavior of the
|
||||
//! corresponding C functions: the former renders a formatted string
|
||||
//! to stdout, the latter renders to a new [`String`] object.
|
||||
use crate::display::Quotable;
|
||||
use crate::error::{UResult, USimpleError};
|
||||
use crate::features::tokenize::sub::SubParser;
|
||||
use crate::features::tokenize::token::Token;
|
||||
use crate::features::tokenize::unescaped_text::UnescapedText;
|
||||
use crate::show_warning;
|
||||
use itertools::put_back_n;
|
||||
use std::io::{stdout, Cursor, Write};
|
||||
use std::iter::Peekable;
|
||||
use std::slice::Iter;
|
||||
|
||||
/// Memo runner of printf
|
||||
/// Takes a format string and arguments
|
||||
/// 1. tokenize format string into tokens, consuming
|
||||
/// any subst. arguments along the way.
|
||||
/// 2. feeds remaining arguments into function
|
||||
/// that prints tokens.
|
||||
struct Memo {
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
fn warn_excess_args(first_arg: &str) {
|
||||
show_warning!(
|
||||
"ignoring excess arguments, starting with {}",
|
||||
first_arg.quote()
|
||||
);
|
||||
}
|
||||
|
||||
impl Memo {
|
||||
fn new<W>(
|
||||
writer: &mut W,
|
||||
pf_string: &str,
|
||||
pf_args_it: &mut Peekable<Iter<String>>,
|
||||
) -> UResult<Self>
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
let mut pm = Self { tokens: Vec::new() };
|
||||
let mut it = put_back_n(pf_string.chars());
|
||||
let mut has_sub = false;
|
||||
loop {
|
||||
if let Some(x) = UnescapedText::from_it_core(writer, &mut it, false) {
|
||||
pm.tokens.push(x);
|
||||
}
|
||||
if let Some(x) = SubParser::from_it(writer, &mut it, pf_args_it)? {
|
||||
if !has_sub {
|
||||
has_sub = true;
|
||||
}
|
||||
pm.tokens.push(x);
|
||||
}
|
||||
if let Some(x) = it.next() {
|
||||
it.put_back(x);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !has_sub {
|
||||
let mut drain = false;
|
||||
if let Some(first_arg) = pf_args_it.peek() {
|
||||
warn_excess_args(first_arg);
|
||||
drain = true;
|
||||
}
|
||||
if drain {
|
||||
loop {
|
||||
// drain remaining args;
|
||||
if pf_args_it.next().is_none() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(pm)
|
||||
}
|
||||
fn apply<W>(&self, writer: &mut W, pf_args_it: &mut Peekable<Iter<String>>)
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
for tkn in &self.tokens {
|
||||
tkn.write(writer, pf_args_it);
|
||||
}
|
||||
}
|
||||
fn run_all<W>(writer: &mut W, pf_string: &str, pf_args: &[String]) -> UResult<()>
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
let mut arg_it = pf_args.iter().peekable();
|
||||
let pm = Self::new(writer, pf_string, &mut arg_it)?;
|
||||
loop {
|
||||
if arg_it.peek().is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
pm.apply(writer, &mut arg_it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Write a formatted string to stdout.
|
||||
///
|
||||
/// `format_string` contains the template and `args` contains the
|
||||
/// arguments to render into the template.
|
||||
///
|
||||
/// See also [`sprintf`], which creates a new formatted [`String`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// use uucore::memo::printf;
|
||||
///
|
||||
/// printf("hello %s", &["world".to_string()]).unwrap();
|
||||
/// // prints "hello world"
|
||||
/// ```
|
||||
pub fn printf(format_string: &str, args: &[String]) -> UResult<()> {
|
||||
let mut writer = stdout();
|
||||
Memo::run_all(&mut writer, format_string, args)
|
||||
}
|
||||
|
||||
/// Create a new formatted string.
|
||||
///
|
||||
/// `format_string` contains the template and `args` contains the
|
||||
/// arguments to render into the template.
|
||||
///
|
||||
/// See also [`printf`], which prints to stdout.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// use uucore::memo::sprintf;
|
||||
///
|
||||
/// let s = sprintf("hello %s", &["world".to_string()]).unwrap();
|
||||
/// assert_eq!(s, "hello world".to_string());
|
||||
/// ```
|
||||
pub fn sprintf(format_string: &str, args: &[String]) -> UResult<String> {
|
||||
let mut writer = Cursor::new(vec![]);
|
||||
Memo::run_all(&mut writer, format_string, args)?;
|
||||
let buf = writer.into_inner();
|
||||
match String::from_utf8(buf) {
|
||||
Ok(s) => Ok(s),
|
||||
Err(e) => Err(USimpleError::new(
|
||||
1,
|
||||
format!("failed to parse formatted string as UTF-8: {e}"),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::memo::sprintf;
|
||||
|
||||
#[test]
|
||||
fn test_sprintf_smoke() {
|
||||
assert_eq!(sprintf("", &[]).unwrap(), "".to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sprintf_no_args() {
|
||||
assert_eq!(
|
||||
sprintf("hello world", &[]).unwrap(),
|
||||
"hello world".to_string()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sprintf_string() {
|
||||
assert_eq!(
|
||||
sprintf("hello %s", &["world".to_string()]).unwrap(),
|
||||
"hello world".to_string()
|
||||
);
|
||||
}
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
#[allow(clippy::module_inception)]
|
||||
mod num_format;
|
||||
pub mod sub;
|
||||
pub mod token;
|
||||
pub mod unescaped_text;
|
|
@ -1,43 +0,0 @@
|
|||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||
|
||||
//! Primitives used by Sub Tokenizer
|
||||
//! and num_format modules
|
||||
#[derive(Clone)]
|
||||
pub enum FieldType {
|
||||
Strf,
|
||||
Floatf,
|
||||
CninetyNineHexFloatf,
|
||||
Scif,
|
||||
Decf,
|
||||
Intf,
|
||||
Charf,
|
||||
}
|
||||
|
||||
// #[allow(non_camel_case_types)]
|
||||
// pub enum FChar {
|
||||
// d,
|
||||
// e,
|
||||
// E,
|
||||
// i,
|
||||
// f,
|
||||
// F,
|
||||
// g,
|
||||
// G,
|
||||
// u,
|
||||
// x,
|
||||
// X,
|
||||
// o
|
||||
// }
|
||||
//
|
||||
|
||||
// a Sub Tokens' fields are stored
|
||||
// as a single object so they can be more simply
|
||||
// passed by ref to num_format in a Sub method
|
||||
#[derive(Clone)]
|
||||
pub struct FormatField<'a> {
|
||||
pub min_width: Option<isize>,
|
||||
pub second_field: Option<u32>,
|
||||
pub field_char: &'a char,
|
||||
pub field_type: &'a FieldType,
|
||||
pub orig: &'a String,
|
||||
}
|
|
@ -1,59 +0,0 @@
|
|||
//! Primitives used by num_format and sub_modules.
|
||||
//! never dealt with above (e.g. Sub Tokenizer never uses these)
|
||||
|
||||
use crate::{display::Quotable, show_error};
|
||||
use itertools::{put_back_n, PutBackN};
|
||||
use std::str::Chars;
|
||||
|
||||
use super::format_field::FormatField;
|
||||
|
||||
// contains the rough ingredients to final
|
||||
// output for a number, organized together
|
||||
// to allow for easy generalization of output manipulation
|
||||
// (e.g. max number of digits after decimal)
|
||||
#[derive(Default)]
|
||||
pub struct FormatPrimitive {
|
||||
pub prefix: Option<String>,
|
||||
pub pre_decimal: Option<String>,
|
||||
pub post_decimal: Option<String>,
|
||||
pub suffix: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
pub enum Base {
|
||||
Ten = 10,
|
||||
Hex = 16,
|
||||
Octal = 8,
|
||||
}
|
||||
|
||||
// information from the beginning of a numeric argument
|
||||
// the precedes the beginning of a numeric value
|
||||
pub struct InitialPrefix {
|
||||
pub radix_in: Base,
|
||||
pub sign: i8,
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
pub trait Formatter {
|
||||
// return a FormatPrimitive for
|
||||
// particular field char(s), given the argument
|
||||
// string and prefix information (sign, radix)
|
||||
fn get_primitive(
|
||||
&self,
|
||||
field: &FormatField,
|
||||
in_prefix: &InitialPrefix,
|
||||
str_in: &str,
|
||||
) -> Option<FormatPrimitive>;
|
||||
// return a string from a FormatPrimitive,
|
||||
// given information about the field
|
||||
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String;
|
||||
}
|
||||
pub fn get_it_at(offset: usize, str_in: &str) -> PutBackN<Chars> {
|
||||
put_back_n(str_in[offset..].chars())
|
||||
}
|
||||
|
||||
// TODO: put this somewhere better
|
||||
pub fn warn_incomplete_conv(pf_arg: &str) {
|
||||
// important: keep println here not print
|
||||
show_error!("{}: value not completely converted", pf_arg.maybe_quote());
|
||||
}
|
|
@ -1,270 +0,0 @@
|
|||
// spell-checker:ignore (ToDO) arrnum arr_num mult basenum bufferval refd vals arrfloat conv intermed addl
|
||||
|
||||
pub fn arrnum_int_mult(arr_num: &[u8], basenum: u8, base_ten_int_fact: u8) -> Vec<u8> {
|
||||
let mut carry: u16 = 0;
|
||||
let mut rem: u16;
|
||||
let mut new_amount: u16;
|
||||
let fact: u16 = u16::from(base_ten_int_fact);
|
||||
let base: u16 = u16::from(basenum);
|
||||
|
||||
let mut ret_rev: Vec<u8> = Vec::new();
|
||||
let mut it = arr_num.iter().rev();
|
||||
loop {
|
||||
let i = it.next();
|
||||
match i {
|
||||
Some(u) => {
|
||||
new_amount = (u16::from(*u) * fact) + carry;
|
||||
rem = new_amount % base;
|
||||
carry = (new_amount - rem) / base;
|
||||
ret_rev.push(rem as u8);
|
||||
}
|
||||
None => {
|
||||
while carry != 0 {
|
||||
rem = carry % base;
|
||||
carry = (carry - rem) / base;
|
||||
ret_rev.push(rem as u8);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let ret: Vec<u8> = ret_rev.into_iter().rev().collect();
|
||||
ret
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub struct Remainder<'a> {
|
||||
pub position: usize,
|
||||
pub replace: Vec<u8>,
|
||||
pub arr_num: &'a Vec<u8>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub struct DivOut<'a> {
|
||||
pub quotient: u8,
|
||||
pub remainder: Remainder<'a>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn arrnum_int_div_step<'a>(
|
||||
rem_in: &'a Remainder,
|
||||
radix_in: u8,
|
||||
base_ten_int_divisor: u8,
|
||||
after_decimal: bool,
|
||||
) -> DivOut<'a> {
|
||||
let mut rem_out = Remainder {
|
||||
position: rem_in.position,
|
||||
replace: Vec::new(),
|
||||
arr_num: rem_in.arr_num,
|
||||
};
|
||||
|
||||
let mut bufferval: u16 = 0;
|
||||
let base: u16 = u16::from(radix_in);
|
||||
let divisor: u16 = u16::from(base_ten_int_divisor);
|
||||
let mut traversed = 0;
|
||||
|
||||
let mut quotient = 0;
|
||||
let refd_vals = &rem_in.arr_num[rem_in.position + rem_in.replace.len()..];
|
||||
let mut it_replace = rem_in.replace.iter();
|
||||
let mut it_f = refd_vals.iter();
|
||||
loop {
|
||||
let u = match it_replace.next() {
|
||||
Some(u_rep) => u16::from(*u_rep),
|
||||
None => match it_f.next() {
|
||||
Some(u_orig) => u16::from(*u_orig),
|
||||
None => {
|
||||
if !after_decimal {
|
||||
break;
|
||||
}
|
||||
0
|
||||
}
|
||||
},
|
||||
};
|
||||
traversed += 1;
|
||||
bufferval += u;
|
||||
if bufferval > divisor {
|
||||
while bufferval >= divisor {
|
||||
quotient += 1;
|
||||
bufferval -= divisor;
|
||||
}
|
||||
rem_out.replace = if bufferval == 0 {
|
||||
Vec::new()
|
||||
} else {
|
||||
let remainder_as_arrnum = unsigned_to_arrnum(bufferval);
|
||||
base_conv_vec(&remainder_as_arrnum, 10, radix_in)
|
||||
};
|
||||
rem_out.position += 1 + (traversed - rem_out.replace.len());
|
||||
break;
|
||||
} else {
|
||||
bufferval *= base;
|
||||
}
|
||||
}
|
||||
DivOut {
|
||||
quotient,
|
||||
remainder: rem_out,
|
||||
}
|
||||
}
|
||||
pub fn arrnum_int_add(arrnum: &[u8], basenum: u8, base_ten_int_term: u8) -> Vec<u8> {
|
||||
let mut carry: u16 = u16::from(base_ten_int_term);
|
||||
let mut rem: u16;
|
||||
let mut new_amount: u16;
|
||||
let base: u16 = u16::from(basenum);
|
||||
|
||||
let mut ret_rev: Vec<u8> = Vec::new();
|
||||
let mut it = arrnum.iter().rev();
|
||||
loop {
|
||||
let i = it.next();
|
||||
match i {
|
||||
Some(u) => {
|
||||
new_amount = u16::from(*u) + carry;
|
||||
rem = new_amount % base;
|
||||
carry = (new_amount - rem) / base;
|
||||
ret_rev.push(rem as u8);
|
||||
}
|
||||
None => {
|
||||
while carry != 0 {
|
||||
rem = carry % base;
|
||||
carry = (carry - rem) / base;
|
||||
ret_rev.push(rem as u8);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let ret: Vec<u8> = ret_rev.into_iter().rev().collect();
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn base_conv_vec(src: &[u8], radix_src: u8, radix_dest: u8) -> Vec<u8> {
|
||||
let mut result = vec![0];
|
||||
for i in src {
|
||||
result = arrnum_int_mult(&result, radix_dest, radix_src);
|
||||
result = arrnum_int_add(&result, radix_dest, *i);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn unsigned_to_arrnum(src: u16) -> Vec<u8> {
|
||||
let mut result: Vec<u8> = Vec::new();
|
||||
let mut src_tmp: u16 = src;
|
||||
while src_tmp > 0 {
|
||||
result.push((src_tmp % 10) as u8);
|
||||
src_tmp /= 10;
|
||||
}
|
||||
result.reverse();
|
||||
result
|
||||
}
|
||||
|
||||
// temporary needs-improvement-function
|
||||
pub fn base_conv_float(src: &[u8], radix_src: u8, _radix_dest: u8) -> f64 {
|
||||
// it would require a lot of addl code
|
||||
// to implement this for arbitrary string input.
|
||||
// until then, the below operates as an outline
|
||||
// of how it would work.
|
||||
let mut factor: f64 = 1_f64;
|
||||
let radix_src_float: f64 = f64::from(radix_src);
|
||||
let mut r: f64 = 0_f64;
|
||||
for (i, u) in src.iter().enumerate() {
|
||||
if i > 15 {
|
||||
break;
|
||||
}
|
||||
factor /= radix_src_float;
|
||||
r += factor * f64::from(*u);
|
||||
}
|
||||
r
|
||||
}
|
||||
|
||||
pub fn str_to_arrnum(src: &str, radix_def_src: &dyn RadixDef) -> Vec<u8> {
|
||||
let mut intermed_in: Vec<u8> = Vec::new();
|
||||
for c in src.chars() {
|
||||
#[allow(clippy::single_match)]
|
||||
match radix_def_src.parse_char(c) {
|
||||
Some(u) => {
|
||||
intermed_in.push(u);
|
||||
}
|
||||
None => {} //todo err msg on incorrect
|
||||
}
|
||||
}
|
||||
intermed_in
|
||||
}
|
||||
|
||||
pub fn arrnum_to_str(src: &[u8], radix_def_dest: &dyn RadixDef) -> String {
|
||||
let mut str_out = String::new();
|
||||
for u in src.iter() {
|
||||
#[allow(clippy::single_match)]
|
||||
match radix_def_dest.format_u8(*u) {
|
||||
Some(c) => {
|
||||
str_out.push(c);
|
||||
}
|
||||
None => {} //todo
|
||||
}
|
||||
}
|
||||
str_out
|
||||
}
|
||||
|
||||
pub fn base_conv_str(
|
||||
src: &str,
|
||||
radix_def_src: &dyn RadixDef,
|
||||
radix_def_dest: &dyn RadixDef,
|
||||
) -> String {
|
||||
let intermed_in: Vec<u8> = str_to_arrnum(src, radix_def_src);
|
||||
let intermed_out = base_conv_vec(
|
||||
&intermed_in,
|
||||
radix_def_src.get_max(),
|
||||
radix_def_dest.get_max(),
|
||||
);
|
||||
arrnum_to_str(&intermed_out, radix_def_dest)
|
||||
}
|
||||
|
||||
pub trait RadixDef {
|
||||
fn get_max(&self) -> u8;
|
||||
fn parse_char(&self, x: char) -> Option<u8>;
|
||||
fn format_u8(&self, x: u8) -> Option<char>;
|
||||
}
|
||||
pub struct RadixTen;
|
||||
|
||||
const ZERO_ASC: u8 = b'0';
|
||||
const UPPER_A_ASC: u8 = b'A';
|
||||
const LOWER_A_ASC: u8 = b'a';
|
||||
|
||||
impl RadixDef for RadixTen {
|
||||
fn get_max(&self) -> u8 {
|
||||
10
|
||||
}
|
||||
fn parse_char(&self, c: char) -> Option<u8> {
|
||||
match c {
|
||||
'0'..='9' => Some(c as u8 - ZERO_ASC),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
fn format_u8(&self, u: u8) -> Option<char> {
|
||||
match u {
|
||||
0..=9 => Some((ZERO_ASC + u) as char),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
pub struct RadixHex;
|
||||
impl RadixDef for RadixHex {
|
||||
fn get_max(&self) -> u8 {
|
||||
16
|
||||
}
|
||||
fn parse_char(&self, c: char) -> Option<u8> {
|
||||
match c {
|
||||
'0'..='9' => Some(c as u8 - ZERO_ASC),
|
||||
'A'..='F' => Some(c as u8 + 10 - UPPER_A_ASC),
|
||||
'a'..='f' => Some(c as u8 + 10 - LOWER_A_ASC),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
fn format_u8(&self, u: u8) -> Option<char> {
|
||||
match u {
|
||||
0..=9 => Some((ZERO_ASC + u) as char),
|
||||
10..=15 => Some((UPPER_A_ASC + (u - 10)) as char),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod tests;
|
|
@ -1,56 +0,0 @@
|
|||
// spell-checker:ignore (ToDO) arrnum mult
|
||||
|
||||
#[cfg(test)]
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_arrnum_int_mult() {
|
||||
// (in base 10) 12 * 4 = 48
|
||||
let factor: Vec<u8> = vec![1, 2];
|
||||
let base_num = 10;
|
||||
let base_ten_int_fact: u8 = 4;
|
||||
let should_output: Vec<u8> = vec![4, 8];
|
||||
|
||||
let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact);
|
||||
assert!(product == should_output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_arrnum_int_non_base_10() {
|
||||
// (in base 3)
|
||||
// 5 * 4 = 20
|
||||
let factor: Vec<u8> = vec![1, 2];
|
||||
let base_num = 3;
|
||||
let base_ten_int_fact: u8 = 4;
|
||||
let should_output: Vec<u8> = vec![2, 0, 2];
|
||||
|
||||
let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact);
|
||||
assert!(product == should_output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_arrnum_int_div_short_circuit() {
|
||||
// (
|
||||
let arrnum: Vec<u8> = vec![5, 5, 5, 5, 0];
|
||||
let base_num = 10;
|
||||
let base_ten_int_divisor: u8 = 41;
|
||||
let remainder_passed_in = Remainder {
|
||||
position: 1,
|
||||
replace: vec![1, 3],
|
||||
arr_num: &arrnum,
|
||||
};
|
||||
|
||||
// the "replace" should mean the number being divided
|
||||
// is 1350, the first time you can get 41 to go into
|
||||
// 1350, its at 135, where you can get a quotient of
|
||||
// 3 and a remainder of 12;
|
||||
|
||||
let quotient_should_be: u8 = 3;
|
||||
let remainder_position_should_be: usize = 3;
|
||||
let remainder_replace_should_be = vec![1, 2];
|
||||
|
||||
let result = arrnum_int_div_step(&remainder_passed_in, base_num, base_ten_int_divisor, false);
|
||||
assert!(quotient_should_be == result.quotient);
|
||||
assert!(remainder_position_should_be == result.remainder.position);
|
||||
assert!(remainder_replace_should_be == result.remainder.replace);
|
||||
}
|
|
@ -1,115 +0,0 @@
|
|||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||
// spell-checker:ignore (ToDO) arrnum
|
||||
|
||||
//! formatter for %a %F C99 Hex-floating-point subs
|
||||
use super::super::format_field::FormatField;
|
||||
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
|
||||
use super::base_conv;
|
||||
use super::base_conv::RadixDef;
|
||||
use super::float_common::{primitive_to_str_common, FloatAnalysis};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct CninetyNineHexFloatf {
|
||||
#[allow(dead_code)]
|
||||
as_num: f64,
|
||||
}
|
||||
impl CninetyNineHexFloatf {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl Formatter for CninetyNineHexFloatf {
|
||||
fn get_primitive(
|
||||
&self,
|
||||
field: &FormatField,
|
||||
initial_prefix: &InitialPrefix,
|
||||
str_in: &str,
|
||||
) -> Option<FormatPrimitive> {
|
||||
let second_field = field.second_field.unwrap_or(6) + 1;
|
||||
let analysis = FloatAnalysis::analyze(
|
||||
str_in,
|
||||
initial_prefix,
|
||||
Some(second_field as usize),
|
||||
None,
|
||||
true,
|
||||
);
|
||||
let f = get_primitive_hex(
|
||||
initial_prefix,
|
||||
&str_in[initial_prefix.offset..],
|
||||
&analysis,
|
||||
second_field as usize,
|
||||
*field.field_char == 'A',
|
||||
);
|
||||
Some(f)
|
||||
}
|
||||
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
|
||||
primitive_to_str_common(prim, &field)
|
||||
}
|
||||
}
|
||||
|
||||
// c99 hex has unique requirements of all floating point subs in pretty much every part of building a primitive, from prefix and suffix to need for base conversion (in all other cases if you don't have decimal you must have decimal, here it's the other way around)
|
||||
|
||||
// on the todo list is to have a trait for get_primitive that is implemented by each float formatter and can override a default. when that happens we can take the parts of get_primitive_dec specific to dec and spin them out to their own functions that can be overridden.
|
||||
fn get_primitive_hex(
|
||||
initial_prefix: &InitialPrefix,
|
||||
_str_in: &str,
|
||||
_analysis: &FloatAnalysis,
|
||||
_last_dec_place: usize,
|
||||
capitalized: bool,
|
||||
) -> FormatPrimitive {
|
||||
let prefix = Some(String::from(if initial_prefix.sign == -1 {
|
||||
"-0x"
|
||||
} else {
|
||||
"0x"
|
||||
}));
|
||||
|
||||
// TODO actual conversion, make sure to get back mantissa.
|
||||
// for hex to hex, it's really just a matter of moving the
|
||||
// decimal point and calculating the mantissa by its initial
|
||||
// position and its moves, with every position counting for
|
||||
// the addition or subtraction of 4 (2**4, because 4 bits in a hex digit)
|
||||
// to the exponent.
|
||||
// decimal's going to be a little more complicated. correct simulation
|
||||
// of glibc will require after-decimal division to a specified precision.
|
||||
// the difficult part of this (arrnum_int_div_step) is already implemented.
|
||||
|
||||
// the hex float name may be a bit misleading in terms of how to go about the
|
||||
// conversion. The best way to do it is to just convert the float number
|
||||
// directly to base 2 and then at the end translate back to hex.
|
||||
let mantissa = 0;
|
||||
let suffix = Some({
|
||||
let ind = if capitalized { "P" } else { "p" };
|
||||
if mantissa >= 0 {
|
||||
format!("{ind}+{mantissa}")
|
||||
} else {
|
||||
format!("{ind}{mantissa}")
|
||||
}
|
||||
});
|
||||
FormatPrimitive {
|
||||
prefix,
|
||||
suffix,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn to_hex(src: &str, before_decimal: bool) -> String {
|
||||
let radix_ten = base_conv::RadixTen;
|
||||
let radix_hex = base_conv::RadixHex;
|
||||
if before_decimal {
|
||||
base_conv::base_conv_str(src, &radix_ten, &radix_hex)
|
||||
} else {
|
||||
let as_arrnum_ten = base_conv::str_to_arrnum(src, &radix_ten);
|
||||
let s = format!(
|
||||
"{}",
|
||||
base_conv::base_conv_float(&as_arrnum_ten, radix_ten.get_max(), radix_hex.get_max())
|
||||
);
|
||||
if s.len() > 2 {
|
||||
String::from(&s[2..])
|
||||
} else {
|
||||
// zero
|
||||
s
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,185 +0,0 @@
|
|||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||
|
||||
//! formatter for %g %G decimal subs
|
||||
use super::super::format_field::FormatField;
|
||||
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
|
||||
use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis};
|
||||
|
||||
const SIGNIFICANT_FIGURES: usize = 6;
|
||||
|
||||
// Parse a numeric string as the nearest integer with a given significance.
|
||||
// This is a helper function for round().
|
||||
// Examples:
|
||||
// round_to_significance("456", 1) == 500
|
||||
// round_to_significance("456", 2) == 460
|
||||
// round_to_significance("456", 9) == 456
|
||||
fn round_to_significance(input: &str, significant_figures: usize) -> u32 {
|
||||
if significant_figures < input.len() {
|
||||
// If the input has too many digits, use a float intermediary
|
||||
// to round it before converting to an integer. Otherwise,
|
||||
// converting straight to integer will truncate.
|
||||
// There might be a cleaner way to do this...
|
||||
let digits = &input[..significant_figures + 1];
|
||||
let float_representation = digits.parse::<f32>().unwrap();
|
||||
(float_representation / 10.0).round() as u32
|
||||
} else {
|
||||
input.parse::<u32>().unwrap_or(0)
|
||||
}
|
||||
}
|
||||
|
||||
// Removing trailing zeroes, expressing the result as an integer where
|
||||
// possible. This is a helper function for round().
|
||||
fn truncate(mut format: FormatPrimitive) -> FormatPrimitive {
|
||||
if let Some(ref post_dec) = format.post_decimal {
|
||||
let trimmed = post_dec.trim_end_matches('0');
|
||||
|
||||
if trimmed.is_empty() {
|
||||
// If there are no nonzero digits after the decimal point,
|
||||
// use integer formatting by clearing post_decimal and suffix.
|
||||
format.post_decimal = Some(String::new());
|
||||
if format.suffix == Some("e+00".into()) {
|
||||
format.suffix = Some(String::new());
|
||||
}
|
||||
} else if trimmed.len() != post_dec.len() {
|
||||
// Otherwise, update the format to remove only the trailing
|
||||
// zeroes (e.g. "4.50" becomes "4.5", not "4"). If there were
|
||||
// no trailing zeroes, do nothing.
|
||||
format.post_decimal = Some(trimmed.to_owned());
|
||||
}
|
||||
}
|
||||
format
|
||||
}
|
||||
|
||||
// Round a format to six significant figures and remove trailing zeroes.
|
||||
fn round(mut format: FormatPrimitive) -> FormatPrimitive {
|
||||
let mut significant_digits_remaining = SIGNIFICANT_FIGURES;
|
||||
|
||||
// First, take as many significant digits as possible from pre_decimal,
|
||||
if format.pre_decimal.is_some() {
|
||||
let input = format.pre_decimal.as_ref().unwrap();
|
||||
let rounded = round_to_significance(input, significant_digits_remaining);
|
||||
let mut rounded_str = rounded.to_string();
|
||||
significant_digits_remaining -= rounded_str.len();
|
||||
|
||||
// If the pre_decimal has exactly enough significant digits,
|
||||
// round the input to the nearest integer. If the first
|
||||
// post_decimal digit is 5 or higher, round up by incrementing
|
||||
// the pre_decimal number. Otherwise, use the pre_decimal as-is.
|
||||
if significant_digits_remaining == 0 {
|
||||
if let Some(digits) = &format.post_decimal {
|
||||
if digits.chars().next().unwrap_or('0') >= '5' {
|
||||
let rounded = rounded + 1;
|
||||
rounded_str = rounded.to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
format.pre_decimal = Some(rounded_str);
|
||||
}
|
||||
|
||||
// If no significant digits remain, or there's no post_decimal to
|
||||
// round, return the rounded pre_decimal value with no post_decimal.
|
||||
// Otherwise, round the post_decimal to the remaining significance.
|
||||
if significant_digits_remaining == 0 {
|
||||
format.post_decimal = Some(String::new());
|
||||
} else if let Some(input) = format.post_decimal {
|
||||
let leading_zeroes = input.len() - input.trim_start_matches('0').len();
|
||||
let digits = &input[leading_zeroes..];
|
||||
|
||||
// In the post_decimal, leading zeroes are significant. "01.0010"
|
||||
// has one significant digit in pre_decimal, and 3 from post_decimal.
|
||||
let mut post_decimal_str = String::with_capacity(significant_digits_remaining);
|
||||
for _ in 0..leading_zeroes {
|
||||
post_decimal_str.push('0');
|
||||
}
|
||||
|
||||
if leading_zeroes < significant_digits_remaining {
|
||||
// After significant leading zeroes, round the remaining digits
|
||||
// to any remaining significance.
|
||||
let rounded = round_to_significance(digits, significant_digits_remaining);
|
||||
post_decimal_str.push_str(&rounded.to_string());
|
||||
} else if leading_zeroes == significant_digits_remaining
|
||||
&& digits.chars().next().unwrap_or('0') >= '5'
|
||||
{
|
||||
// If necessary, round up the post_decimal ("1.000009" should
|
||||
// round to 1.00001, instead of truncating after the last
|
||||
// significant leading zero).
|
||||
post_decimal_str.pop();
|
||||
post_decimal_str.push('1');
|
||||
} else {
|
||||
// If the rounded post_decimal is entirely zeroes, discard
|
||||
// it and use integer formatting instead.
|
||||
post_decimal_str = String::new();
|
||||
}
|
||||
|
||||
format.post_decimal = Some(post_decimal_str);
|
||||
}
|
||||
truncate(format)
|
||||
}
|
||||
|
||||
// Given an exponent used in scientific notation, return whether the
|
||||
// number is small enough to be expressed as a decimal instead. "Small
|
||||
// enough" is based only on the number's magnitude, not the length of
|
||||
// any string representation.
|
||||
fn should_represent_as_decimal(suffix: &Option<String>) -> bool {
|
||||
match suffix {
|
||||
Some(exponent) => {
|
||||
if exponent.chars().nth(1) == Some('-') {
|
||||
exponent < &"e-05".into()
|
||||
} else {
|
||||
exponent < &"e+06".into()
|
||||
}
|
||||
}
|
||||
None => true,
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Decf;
|
||||
|
||||
impl Decf {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
}
|
||||
impl Formatter for Decf {
|
||||
fn get_primitive(
|
||||
&self,
|
||||
field: &FormatField,
|
||||
initial_prefix: &InitialPrefix,
|
||||
str_in: &str,
|
||||
) -> Option<FormatPrimitive> {
|
||||
let second_field = field.second_field.unwrap_or(6) + 1;
|
||||
// default to scif interpretation so as to not truncate input vals
|
||||
// (that would be displayed in scif) based on relation to decimal place
|
||||
let analysis = FloatAnalysis::analyze(
|
||||
str_in,
|
||||
initial_prefix,
|
||||
Some(second_field as usize + 1),
|
||||
None,
|
||||
false,
|
||||
);
|
||||
let mut f_dec = get_primitive_dec(
|
||||
initial_prefix,
|
||||
&str_in[initial_prefix.offset..],
|
||||
&analysis,
|
||||
second_field as usize,
|
||||
Some(*field.field_char == 'G'),
|
||||
);
|
||||
|
||||
if should_represent_as_decimal(&f_dec.suffix) {
|
||||
// Use decimal formatting instead of scientific notation
|
||||
// if the input's magnitude is small.
|
||||
f_dec = get_primitive_dec(
|
||||
initial_prefix,
|
||||
&str_in[initial_prefix.offset..],
|
||||
&analysis,
|
||||
second_field as usize,
|
||||
None,
|
||||
);
|
||||
}
|
||||
|
||||
Some(round(f_dec))
|
||||
}
|
||||
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
|
||||
primitive_to_str_common(prim, &field)
|
||||
}
|
||||
}
|
|
@ -1,377 +0,0 @@
|
|||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||
// spell-checker:ignore (ToDO) arrnum
|
||||
|
||||
use super::super::format_field::FormatField;
|
||||
use super::super::formatter::{
|
||||
get_it_at, warn_incomplete_conv, Base, FormatPrimitive, InitialPrefix,
|
||||
};
|
||||
use super::base_conv;
|
||||
use super::base_conv::RadixDef;
|
||||
|
||||
// if the memory, copy, and comparison cost of chars
|
||||
// becomes an issue, we can always operate in vec<u8> here
|
||||
// rather than just at de_hex
|
||||
|
||||
pub struct FloatAnalysis {
|
||||
pub len_important: usize,
|
||||
// none means no decimal point.
|
||||
pub decimal_pos: Option<usize>,
|
||||
pub follow: Option<char>,
|
||||
}
|
||||
fn has_enough_digits(
|
||||
hex_input: bool,
|
||||
hex_output: bool,
|
||||
string_position: usize,
|
||||
starting_position: usize,
|
||||
limit: usize,
|
||||
) -> bool {
|
||||
// -1s are for rounding
|
||||
if hex_output {
|
||||
if hex_input {
|
||||
(string_position - 1) - starting_position >= limit
|
||||
} else {
|
||||
false //undecidable without converting
|
||||
}
|
||||
} else if hex_input {
|
||||
(((string_position - 1) - starting_position) * 9) / 8 >= limit
|
||||
} else {
|
||||
(string_position - 1) - starting_position >= limit
|
||||
}
|
||||
}
|
||||
|
||||
impl FloatAnalysis {
|
||||
#[allow(clippy::cognitive_complexity)]
|
||||
pub fn analyze(
|
||||
str_in: &str,
|
||||
initial_prefix: &InitialPrefix,
|
||||
max_sd_opt: Option<usize>,
|
||||
max_after_dec_opt: Option<usize>,
|
||||
hex_output: bool,
|
||||
) -> Self {
|
||||
// this fn assumes
|
||||
// the input string
|
||||
// has no leading spaces or 0s
|
||||
let str_it = get_it_at(initial_prefix.offset, str_in);
|
||||
let mut ret = Self {
|
||||
len_important: 0,
|
||||
decimal_pos: None,
|
||||
follow: None,
|
||||
};
|
||||
let hex_input = match initial_prefix.radix_in {
|
||||
Base::Hex => true,
|
||||
Base::Ten => false,
|
||||
Base::Octal => {
|
||||
panic!("this should never happen: floats should never receive octal input");
|
||||
}
|
||||
};
|
||||
let mut i = 0;
|
||||
let mut pos_before_first_nonzero_after_decimal: Option<usize> = None;
|
||||
for c in str_it {
|
||||
match c {
|
||||
e @ ('0'..='9' | 'A'..='F' | 'a'..='f') => {
|
||||
if !hex_input {
|
||||
match e {
|
||||
'0'..='9' => {}
|
||||
_ => {
|
||||
warn_incomplete_conv(str_in);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ret.decimal_pos.is_some()
|
||||
&& pos_before_first_nonzero_after_decimal.is_none()
|
||||
&& e != '0'
|
||||
{
|
||||
pos_before_first_nonzero_after_decimal = Some(i - 1);
|
||||
}
|
||||
if let Some(max_sd) = max_sd_opt {
|
||||
if i == max_sd {
|
||||
// follow is used in cases of %g
|
||||
// where the character right after the last
|
||||
// sd is considered is rounded affecting
|
||||
// the previous digit in 1/2 of instances
|
||||
ret.follow = Some(e);
|
||||
} else if ret.decimal_pos.is_some() && i > max_sd {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if let Some(max_after_dec) = max_after_dec_opt {
|
||||
if let Some(p) = ret.decimal_pos {
|
||||
if has_enough_digits(hex_input, hex_output, i, p, max_after_dec) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if let Some(max_sd) = max_sd_opt {
|
||||
if let Some(p) = pos_before_first_nonzero_after_decimal {
|
||||
if has_enough_digits(hex_input, hex_output, i, p, max_sd) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'.' => {
|
||||
if ret.decimal_pos.is_none() {
|
||||
ret.decimal_pos = Some(i);
|
||||
} else {
|
||||
warn_incomplete_conv(str_in);
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
warn_incomplete_conv(str_in);
|
||||
break;
|
||||
}
|
||||
};
|
||||
i += 1;
|
||||
}
|
||||
ret.len_important = i;
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
fn de_hex(src: &str, before_decimal: bool) -> String {
|
||||
let radix_ten = base_conv::RadixTen;
|
||||
let radix_hex = base_conv::RadixHex;
|
||||
if before_decimal {
|
||||
base_conv::base_conv_str(src, &radix_hex, &radix_ten)
|
||||
} else {
|
||||
let as_arrnum_hex = base_conv::str_to_arrnum(src, &radix_hex);
|
||||
let s = format!(
|
||||
"{}",
|
||||
base_conv::base_conv_float(&as_arrnum_hex, radix_hex.get_max(), radix_ten.get_max())
|
||||
);
|
||||
if s.len() > 2 {
|
||||
String::from(&s[2..])
|
||||
} else {
|
||||
// zero
|
||||
s
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// takes a string in,
|
||||
// truncates to a position,
|
||||
// bumps the last digit up one,
|
||||
// and if the digit was nine
|
||||
// propagate to the next, etc.
|
||||
// If before the decimal and the most
|
||||
// significant digit is a 9, it becomes a 1
|
||||
fn _round_str_from(in_str: &str, position: usize, before_dec: bool) -> (String, bool) {
|
||||
let mut it = in_str[0..position].chars();
|
||||
let mut rev = String::new();
|
||||
let mut i = position;
|
||||
let mut finished_in_dec = false;
|
||||
while let Some(c) = it.next_back() {
|
||||
i -= 1;
|
||||
match c {
|
||||
'9' => {
|
||||
// If we're before the decimal
|
||||
// and on the most significant digit,
|
||||
// round 9 to 1, else to 0.
|
||||
if before_dec && i == 0 {
|
||||
rev.push('1');
|
||||
} else {
|
||||
rev.push('0');
|
||||
}
|
||||
}
|
||||
e => {
|
||||
rev.push(((e as u8) + 1) as char);
|
||||
finished_in_dec = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut fwd = String::from(&in_str[0..i]);
|
||||
for ch in rev.chars().rev() {
|
||||
fwd.push(ch);
|
||||
}
|
||||
(fwd, finished_in_dec)
|
||||
}
|
||||
|
||||
fn round_terminal_digit(
|
||||
before_dec: String,
|
||||
after_dec: String,
|
||||
position: usize,
|
||||
) -> (String, String, bool) {
|
||||
if position < after_dec.len() {
|
||||
let digit_at_pos: char;
|
||||
{
|
||||
digit_at_pos = after_dec[position..=position].chars().next().expect("");
|
||||
}
|
||||
if let '5'..='9' = digit_at_pos {
|
||||
let (new_after_dec, finished_in_dec) = _round_str_from(&after_dec, position, false);
|
||||
if finished_in_dec {
|
||||
return (before_dec, new_after_dec, false);
|
||||
} else {
|
||||
let (new_before_dec, _) = _round_str_from(&before_dec, before_dec.len(), true);
|
||||
let mut dec_place_chg = false;
|
||||
let mut before_dec_chars = new_before_dec.chars();
|
||||
if before_dec_chars.next() == Some('1') && before_dec_chars.all(|c| c == '0') {
|
||||
// If the first digit is a one and remaining are zeros, we have
|
||||
// rounded to a new decimal place, so the decimal place must be updated.
|
||||
// Only update decimal place if the before decimal != 0
|
||||
dec_place_chg = before_dec != "0";
|
||||
}
|
||||
return (new_before_dec, new_after_dec, dec_place_chg);
|
||||
}
|
||||
// TODO
|
||||
}
|
||||
}
|
||||
(before_dec, after_dec, false)
|
||||
}
|
||||
|
||||
#[allow(clippy::cognitive_complexity)]
|
||||
pub fn get_primitive_dec(
|
||||
initial_prefix: &InitialPrefix,
|
||||
str_in: &str,
|
||||
analysis: &FloatAnalysis,
|
||||
last_dec_place: usize,
|
||||
sci_mode: Option<bool>,
|
||||
) -> FormatPrimitive {
|
||||
let mut f = FormatPrimitive::default();
|
||||
|
||||
// add negative sign section
|
||||
if initial_prefix.sign == -1 {
|
||||
f.prefix = Some(String::from("-"));
|
||||
}
|
||||
|
||||
// assign the digits before and after the decimal points
|
||||
// to separate slices. If no digits after decimal point,
|
||||
// assign 0
|
||||
let (mut first_segment_raw, second_segment_raw) = match analysis.decimal_pos {
|
||||
Some(pos) => (&str_in[..pos], &str_in[pos + 1..]),
|
||||
None => (str_in, "0"),
|
||||
};
|
||||
if first_segment_raw.is_empty() {
|
||||
first_segment_raw = "0";
|
||||
}
|
||||
// convert to string, de_hexifying if input is in hex // spell-checker:disable-line
|
||||
let (first_segment, second_segment) = match initial_prefix.radix_in {
|
||||
Base::Hex => (
|
||||
de_hex(first_segment_raw, true),
|
||||
de_hex(second_segment_raw, false),
|
||||
),
|
||||
_ => (
|
||||
String::from(first_segment_raw),
|
||||
String::from(second_segment_raw),
|
||||
),
|
||||
};
|
||||
let (pre_dec_unrounded, post_dec_unrounded, mut mantissa) = if sci_mode.is_some() {
|
||||
if first_segment.len() > 1 {
|
||||
let mut post_dec = String::from(&first_segment[1..]);
|
||||
post_dec.push_str(&second_segment);
|
||||
(
|
||||
String::from(&first_segment[0..1]),
|
||||
post_dec,
|
||||
first_segment.len() as isize - 1,
|
||||
)
|
||||
} else {
|
||||
match first_segment
|
||||
.chars()
|
||||
.next()
|
||||
.expect("float_common: no chars in first segment.")
|
||||
{
|
||||
'0' => {
|
||||
let it = second_segment.chars().enumerate();
|
||||
let mut m: isize = 0;
|
||||
let mut pre = String::from("0");
|
||||
let mut post = String::from("0");
|
||||
for (i, c) in it {
|
||||
match c {
|
||||
'0' => {}
|
||||
_ => {
|
||||
m = -((i as isize) + 1);
|
||||
pre = String::from(&second_segment[i..=i]);
|
||||
post = String::from(&second_segment[i + 1..]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
(pre, post, m)
|
||||
}
|
||||
_ => (first_segment, second_segment, 0),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
(first_segment, second_segment, 0)
|
||||
};
|
||||
|
||||
let (pre_dec_draft, post_dec_draft, dec_place_chg) =
|
||||
round_terminal_digit(pre_dec_unrounded, post_dec_unrounded, last_dec_place - 1);
|
||||
f.post_decimal = Some(post_dec_draft);
|
||||
if let Some(capitalized) = sci_mode {
|
||||
let si_ind = if capitalized { 'E' } else { 'e' };
|
||||
// Increase the mantissa if we're adding a decimal place
|
||||
if dec_place_chg {
|
||||
mantissa += 1;
|
||||
}
|
||||
f.suffix = Some(if mantissa >= 0 {
|
||||
format!("{si_ind}+{mantissa:02}")
|
||||
} else {
|
||||
// negative sign is considered in format!s
|
||||
// leading zeroes
|
||||
format!("{si_ind}{mantissa:03}")
|
||||
});
|
||||
f.pre_decimal = Some(pre_dec_draft);
|
||||
} else if dec_place_chg {
|
||||
// We've rounded up to a new decimal place so append 0
|
||||
f.pre_decimal = Some(pre_dec_draft + "0");
|
||||
} else {
|
||||
f.pre_decimal = Some(pre_dec_draft);
|
||||
}
|
||||
|
||||
f
|
||||
}
|
||||
|
||||
pub fn primitive_to_str_common(prim: &FormatPrimitive, field: &FormatField) -> String {
|
||||
let mut final_str = String::new();
|
||||
if let Some(ref prefix) = prim.prefix {
|
||||
final_str.push_str(prefix);
|
||||
}
|
||||
match prim.pre_decimal {
|
||||
Some(ref pre_decimal) => {
|
||||
final_str.push_str(pre_decimal);
|
||||
}
|
||||
None => {
|
||||
panic!(
|
||||
"error, format primitives provided to int, will, incidentally under correct \
|
||||
behavior, always have a pre_dec value."
|
||||
);
|
||||
}
|
||||
}
|
||||
let decimal_places = field.second_field.unwrap_or(6);
|
||||
match prim.post_decimal {
|
||||
Some(ref post_decimal) => {
|
||||
if !post_decimal.is_empty() && decimal_places > 0 {
|
||||
final_str.push('.');
|
||||
let len_avail = post_decimal.len() as u32;
|
||||
|
||||
if decimal_places >= len_avail {
|
||||
// println!("dec {}, len avail {}", decimal_places, len_avail);
|
||||
final_str.push_str(post_decimal);
|
||||
|
||||
if *field.field_char != 'g' && *field.field_char != 'G' {
|
||||
let diff = decimal_places - len_avail;
|
||||
for _ in 0..diff {
|
||||
final_str.push('0');
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// println!("printing to only {}", decimal_places);
|
||||
final_str.push_str(&post_decimal[0..decimal_places as usize]);
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
panic!(
|
||||
"error, format primitives provided to int, will, incidentally under correct \
|
||||
behavior, always have a pre_dec value."
|
||||
);
|
||||
}
|
||||
}
|
||||
if let Some(ref suffix) = prim.suffix {
|
||||
final_str.push_str(suffix);
|
||||
}
|
||||
|
||||
final_str
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||
// spell-checker:ignore (ToDO) arrnum
|
||||
|
||||
//! formatter for %f %F common-notation floating-point subs
|
||||
use super::super::format_field::FormatField;
|
||||
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
|
||||
use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Floatf;
|
||||
impl Floatf {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
}
|
||||
impl Formatter for Floatf {
|
||||
fn get_primitive(
|
||||
&self,
|
||||
field: &FormatField,
|
||||
initial_prefix: &InitialPrefix,
|
||||
str_in: &str,
|
||||
) -> Option<FormatPrimitive> {
|
||||
let second_field = field.second_field.unwrap_or(6) + 1;
|
||||
let analysis = FloatAnalysis::analyze(
|
||||
str_in,
|
||||
initial_prefix,
|
||||
None,
|
||||
Some(second_field as usize),
|
||||
false,
|
||||
);
|
||||
let f = get_primitive_dec(
|
||||
initial_prefix,
|
||||
&str_in[initial_prefix.offset..],
|
||||
&analysis,
|
||||
second_field as usize,
|
||||
None,
|
||||
);
|
||||
Some(f)
|
||||
}
|
||||
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
|
||||
primitive_to_str_common(prim, &field)
|
||||
}
|
||||
}
|
|
@ -1,282 +0,0 @@
|
|||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||
// spell-checker:ignore (ToDO) arrnum
|
||||
|
||||
//! formatter for unsigned and signed int subs
|
||||
//! unsigned int: %X %x (hex u64) %o (octal u64) %u (base ten u64)
|
||||
//! signed int: %i %d (both base ten i64)
|
||||
use super::super::format_field::FormatField;
|
||||
use super::super::formatter::{
|
||||
get_it_at, warn_incomplete_conv, Base, FormatPrimitive, Formatter, InitialPrefix,
|
||||
};
|
||||
use std::i64;
|
||||
use std::u64;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Intf {
|
||||
_a: u32,
|
||||
}
|
||||
|
||||
// see the Intf::analyze() function below
|
||||
struct IntAnalysis {
|
||||
check_past_max: bool,
|
||||
past_max: bool,
|
||||
is_zero: bool,
|
||||
len_digits: u8,
|
||||
}
|
||||
|
||||
impl Intf {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
// take a ref to argument string, and basic information
|
||||
// about prefix (offset, radix, sign), and analyze string
|
||||
// to gain the IntAnalysis information above
|
||||
// check_past_max: true if the number *may* be above max,
|
||||
// but we don't know either way. One of several reasons
|
||||
// we may have to parse as int.
|
||||
// past_max: true if the object is past max, false if not
|
||||
// in the future we should probably combine these into an
|
||||
// Option<bool>
|
||||
// is_zero: true if number is zero, false otherwise
|
||||
// len_digits: length of digits used to create the int
|
||||
// important, for example, if we run into a non-valid character
|
||||
#[allow(clippy::cognitive_complexity)]
|
||||
fn analyze(str_in: &str, signed_out: bool, initial_prefix: &InitialPrefix) -> IntAnalysis {
|
||||
// the maximum number of digits we could conceivably
|
||||
// have before the decimal point without exceeding the
|
||||
// max
|
||||
let mut str_it = get_it_at(initial_prefix.offset, str_in);
|
||||
let max_sd_in = if signed_out {
|
||||
match initial_prefix.radix_in {
|
||||
Base::Ten => 19,
|
||||
Base::Octal => 21,
|
||||
Base::Hex => 16,
|
||||
}
|
||||
} else {
|
||||
match initial_prefix.radix_in {
|
||||
Base::Ten => 20,
|
||||
Base::Octal => 22,
|
||||
Base::Hex => 16,
|
||||
}
|
||||
};
|
||||
let mut ret = IntAnalysis {
|
||||
check_past_max: false,
|
||||
past_max: false,
|
||||
is_zero: false,
|
||||
len_digits: 0,
|
||||
};
|
||||
|
||||
// todo turn this to a while let now that we know
|
||||
// no special behavior on EOI break
|
||||
loop {
|
||||
let c_opt = str_it.next();
|
||||
if let Some(c) = c_opt {
|
||||
match c {
|
||||
'0'..='9' | 'a'..='f' | 'A'..='F' => {
|
||||
if ret.len_digits == 0 && c == '0' {
|
||||
ret.is_zero = true;
|
||||
} else if ret.is_zero {
|
||||
ret.is_zero = false;
|
||||
}
|
||||
ret.len_digits += 1;
|
||||
if ret.len_digits == max_sd_in {
|
||||
if let Some(next_ch) = str_it.next() {
|
||||
match next_ch {
|
||||
'0'..='9' => {
|
||||
ret.past_max = true;
|
||||
}
|
||||
_ => {
|
||||
// force conversion
|
||||
// to check if its above max.
|
||||
// todo: spin out convert
|
||||
// into fn, call it here to try
|
||||
// read val, on Ok()
|
||||
// save val for reuse later
|
||||
// that way on same-base in and out
|
||||
// we don't needlessly convert int
|
||||
// to str, we can just copy it over.
|
||||
ret.check_past_max = true;
|
||||
str_it.put_back(next_ch);
|
||||
}
|
||||
}
|
||||
if ret.past_max {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
ret.check_past_max = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
warn_incomplete_conv(str_in);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// breaks on EOL
|
||||
break;
|
||||
}
|
||||
}
|
||||
ret
|
||||
}
|
||||
// get a FormatPrimitive of the maximum value for the field char
|
||||
// and given sign
|
||||
fn get_max(field_char: char, sign: i8) -> FormatPrimitive {
|
||||
let mut fmt_primitive = FormatPrimitive::default();
|
||||
fmt_primitive.pre_decimal = Some(String::from(match field_char {
|
||||
'd' | 'i' => match sign {
|
||||
1 => "9223372036854775807",
|
||||
_ => {
|
||||
fmt_primitive.prefix = Some(String::from("-"));
|
||||
"9223372036854775808"
|
||||
}
|
||||
},
|
||||
'x' | 'X' => "ffffffffffffffff",
|
||||
'o' => "1777777777777777777777",
|
||||
/* 'u' | */ _ => "18446744073709551615",
|
||||
}));
|
||||
fmt_primitive
|
||||
}
|
||||
// conv_from_segment contract:
|
||||
// 1. takes
|
||||
// - a string that begins with a non-zero digit, and proceeds
|
||||
// with zero or more following digits until the end of the string
|
||||
// - a radix to interpret those digits as
|
||||
// - a char that communicates:
|
||||
// whether to interpret+output the string as an i64 or u64
|
||||
// what radix to write the parsed number as.
|
||||
// 2. parses it as a rust integral type
|
||||
// 3. outputs FormatPrimitive with:
|
||||
// - if the string falls within bounds:
|
||||
// number parsed and written in the correct radix
|
||||
// - if the string falls outside bounds:
|
||||
// for i64 output, the int minimum or int max (depending on sign)
|
||||
// for u64 output, the u64 max in the output radix
|
||||
fn conv_from_segment(
|
||||
segment: &str,
|
||||
radix_in: Base,
|
||||
field_char: char,
|
||||
sign: i8,
|
||||
) -> FormatPrimitive {
|
||||
match field_char {
|
||||
'i' | 'd' => match i64::from_str_radix(segment, radix_in as u32) {
|
||||
Ok(i) => {
|
||||
let mut fmt_prim = FormatPrimitive::default();
|
||||
if sign == -1 {
|
||||
fmt_prim.prefix = Some(String::from("-"));
|
||||
}
|
||||
fmt_prim.pre_decimal = Some(format!("{i}"));
|
||||
fmt_prim
|
||||
}
|
||||
Err(_) => Self::get_max(field_char, sign),
|
||||
},
|
||||
_ => match u64::from_str_radix(segment, radix_in as u32) {
|
||||
Ok(u) => {
|
||||
let mut fmt_prim = FormatPrimitive::default();
|
||||
let u_f = if sign == -1 { u64::MAX - (u - 1) } else { u };
|
||||
fmt_prim.pre_decimal = Some(match field_char {
|
||||
'X' => format!("{u_f:X}"),
|
||||
'x' => format!("{u_f:x}"),
|
||||
'o' => format!("{u_f:o}"),
|
||||
_ => format!("{u_f}"),
|
||||
});
|
||||
fmt_prim
|
||||
}
|
||||
Err(_) => Self::get_max(field_char, sign),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
impl Formatter for Intf {
|
||||
fn get_primitive(
|
||||
&self,
|
||||
field: &FormatField,
|
||||
initial_prefix: &InitialPrefix,
|
||||
str_in: &str,
|
||||
) -> Option<FormatPrimitive> {
|
||||
let begin = initial_prefix.offset;
|
||||
|
||||
// get information about the string. see Intf::Analyze
|
||||
// def above.
|
||||
let convert_hints = Self::analyze(
|
||||
str_in,
|
||||
*field.field_char == 'i' || *field.field_char == 'd',
|
||||
initial_prefix,
|
||||
);
|
||||
// We always will have a format primitive to return
|
||||
Some(if convert_hints.len_digits == 0 || convert_hints.is_zero {
|
||||
// if non-digit or end is reached before a non-zero digit
|
||||
FormatPrimitive {
|
||||
pre_decimal: Some(String::from("0")),
|
||||
..Default::default()
|
||||
}
|
||||
} else if !convert_hints.past_max {
|
||||
// if the number is or may be below the bounds limit
|
||||
let radix_out = match *field.field_char {
|
||||
'd' | 'i' | 'u' => Base::Ten,
|
||||
'x' | 'X' => Base::Hex,
|
||||
/* 'o' | */ _ => Base::Octal,
|
||||
};
|
||||
let radix_mismatch = !radix_out.eq(&initial_prefix.radix_in);
|
||||
let decrease_from_max: bool = initial_prefix.sign == -1 && *field.field_char != 'i';
|
||||
let end = begin + convert_hints.len_digits as usize;
|
||||
|
||||
// convert to int if any one of these is true:
|
||||
// - number of digits in int indicates it may be past max
|
||||
// - we're subtracting from the max
|
||||
// - we're converting the base
|
||||
if convert_hints.check_past_max || decrease_from_max || radix_mismatch {
|
||||
// radix of in and out is the same.
|
||||
let segment = String::from(&str_in[begin..end]);
|
||||
Self::conv_from_segment(
|
||||
&segment,
|
||||
initial_prefix.radix_in.clone(),
|
||||
*field.field_char,
|
||||
initial_prefix.sign,
|
||||
)
|
||||
} else {
|
||||
// otherwise just do a straight string copy.
|
||||
let mut fmt_prim = FormatPrimitive::default();
|
||||
|
||||
// this is here and not earlier because
|
||||
// zero doesn't get a sign, and conv_from_segment
|
||||
// creates its format primitive separately
|
||||
if initial_prefix.sign == -1 && *field.field_char == 'i' {
|
||||
fmt_prim.prefix = Some(String::from("-"));
|
||||
}
|
||||
fmt_prim.pre_decimal = Some(String::from(&str_in[begin..end]));
|
||||
fmt_prim
|
||||
}
|
||||
} else {
|
||||
Self::get_max(*field.field_char, initial_prefix.sign)
|
||||
})
|
||||
}
|
||||
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
|
||||
let mut final_str: String = String::new();
|
||||
if let Some(ref prefix) = prim.prefix {
|
||||
final_str.push_str(prefix);
|
||||
}
|
||||
// integral second fields is zero-padded minimum-width
|
||||
// which gets handled before general minimum-width
|
||||
match prim.pre_decimal {
|
||||
Some(ref pre_decimal) => {
|
||||
if let Some(min) = field.second_field {
|
||||
let mut i = min;
|
||||
let len = pre_decimal.len() as u32;
|
||||
while i > len {
|
||||
final_str.push('0');
|
||||
i -= 1;
|
||||
}
|
||||
}
|
||||
final_str.push_str(pre_decimal);
|
||||
}
|
||||
None => {
|
||||
panic!(
|
||||
"error, format primitives provided to int, will, incidentally under \
|
||||
correct behavior, always have a pre_dec value."
|
||||
);
|
||||
}
|
||||
}
|
||||
final_str
|
||||
}
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety
|
||||
|
||||
mod base_conv;
|
||||
pub mod cninetyninehexfloatf;
|
||||
pub mod decf;
|
||||
mod float_common;
|
||||
pub mod floatf;
|
||||
pub mod intf;
|
||||
pub mod scif;
|
|
@ -1,43 +0,0 @@
|
|||
// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety
|
||||
|
||||
//! formatter for %e %E scientific notation subs
|
||||
use super::super::format_field::FormatField;
|
||||
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
|
||||
use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Scif;
|
||||
|
||||
impl Scif {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
}
|
||||
impl Formatter for Scif {
|
||||
fn get_primitive(
|
||||
&self,
|
||||
field: &FormatField,
|
||||
initial_prefix: &InitialPrefix,
|
||||
str_in: &str,
|
||||
) -> Option<FormatPrimitive> {
|
||||
let second_field = field.second_field.unwrap_or(6) + 1;
|
||||
let analysis = FloatAnalysis::analyze(
|
||||
str_in,
|
||||
initial_prefix,
|
||||
Some(second_field as usize + 1),
|
||||
None,
|
||||
false,
|
||||
);
|
||||
let f = get_primitive_dec(
|
||||
initial_prefix,
|
||||
&str_in[initial_prefix.offset..],
|
||||
&analysis,
|
||||
second_field as usize,
|
||||
Some(*field.field_char == 'E'),
|
||||
);
|
||||
Some(f)
|
||||
}
|
||||
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
|
||||
primitive_to_str_common(prim, &field)
|
||||
}
|
||||
}
|
|
@ -1,4 +0,0 @@
|
|||
pub mod format_field;
|
||||
mod formatter;
|
||||
mod formatters;
|
||||
pub mod num_format;
|
|
@ -1,271 +0,0 @@
|
|||
// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety
|
||||
|
||||
//! handles creating printed output for numeric substitutions
|
||||
|
||||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||
|
||||
use std::env;
|
||||
use std::vec::Vec;
|
||||
|
||||
use crate::display::Quotable;
|
||||
use crate::{show_error, show_warning};
|
||||
|
||||
use super::format_field::{FieldType, FormatField};
|
||||
use super::formatter::{Base, FormatPrimitive, Formatter, InitialPrefix};
|
||||
use super::formatters::cninetyninehexfloatf::CninetyNineHexFloatf;
|
||||
use super::formatters::decf::Decf;
|
||||
use super::formatters::floatf::Floatf;
|
||||
use super::formatters::intf::Intf;
|
||||
use super::formatters::scif::Scif;
|
||||
|
||||
pub fn warn_expected_numeric(pf_arg: &str) {
|
||||
// important: keep println here not print
|
||||
show_error!("{}: expected a numeric value", pf_arg.maybe_quote());
|
||||
}
|
||||
|
||||
// when character constant arguments have excess characters
|
||||
// issue a warning when POSIXLY_CORRECT is not set
|
||||
fn warn_char_constant_ign(remaining_bytes: &[u8]) {
|
||||
match env::var("POSIXLY_CORRECT") {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
if let env::VarError::NotPresent = e {
|
||||
show_warning!(
|
||||
"{:?}: character(s) following character \
|
||||
constant have been ignored",
|
||||
remaining_bytes
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// this function looks at the first few
|
||||
// characters of an argument and returns a value if we can learn
|
||||
// a value from that (e.g. no argument? return 0, char constant? ret value)
|
||||
fn get_provided(str_in_opt: Option<&String>) -> Option<u8> {
|
||||
const C_S_QUOTE: u8 = 39;
|
||||
const C_D_QUOTE: u8 = 34;
|
||||
match str_in_opt {
|
||||
Some(str_in) => {
|
||||
let mut byte_it = str_in.bytes();
|
||||
if let Some(ch) = byte_it.next() {
|
||||
match ch {
|
||||
C_S_QUOTE | C_D_QUOTE => {
|
||||
Some(match byte_it.next() {
|
||||
Some(second_byte) => {
|
||||
let mut ignored: Vec<u8> = Vec::new();
|
||||
for cont in byte_it {
|
||||
ignored.push(cont);
|
||||
}
|
||||
if !ignored.is_empty() {
|
||||
warn_char_constant_ign(&ignored);
|
||||
}
|
||||
second_byte
|
||||
}
|
||||
// no byte after quote
|
||||
None => {
|
||||
let so_far = (ch as char).to_string();
|
||||
warn_expected_numeric(&so_far);
|
||||
0_u8
|
||||
}
|
||||
})
|
||||
}
|
||||
// first byte is not quote
|
||||
_ => None, // no first byte
|
||||
}
|
||||
} else {
|
||||
Some(0_u8)
|
||||
}
|
||||
}
|
||||
None => Some(0),
|
||||
}
|
||||
}
|
||||
|
||||
// takes a string and returns
|
||||
// a sign,
|
||||
// a base,
|
||||
// and an offset for index after all
|
||||
// initial spacing, sign, base prefix, and leading zeroes
|
||||
#[allow(clippy::cognitive_complexity)]
|
||||
fn get_initial_prefix(str_in: &str, field_type: &FieldType) -> InitialPrefix {
|
||||
let mut str_it = str_in.chars();
|
||||
let mut ret = InitialPrefix {
|
||||
radix_in: Base::Ten,
|
||||
sign: 1,
|
||||
offset: 0,
|
||||
};
|
||||
let mut top_char = str_it.next();
|
||||
// skip spaces and ensure top_char is the first non-space char
|
||||
// (or None if none exists)
|
||||
while let Some(' ') = top_char {
|
||||
ret.offset += 1;
|
||||
top_char = str_it.next();
|
||||
}
|
||||
// parse sign
|
||||
match top_char {
|
||||
Some('+') => {
|
||||
ret.offset += 1;
|
||||
top_char = str_it.next();
|
||||
}
|
||||
Some('-') => {
|
||||
ret.sign = -1;
|
||||
ret.offset += 1;
|
||||
top_char = str_it.next();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
// we want to exit with offset being
|
||||
// the index of the first non-zero
|
||||
// digit before the decimal point or
|
||||
// if there is none, the zero before the
|
||||
// decimal point, or, if there is none,
|
||||
// the decimal point.
|
||||
|
||||
// while we are determining the offset
|
||||
// we will ensure as a convention
|
||||
// the offset is always on the first character
|
||||
// that we are yet unsure if it is the
|
||||
// final offset. If the zero could be before
|
||||
// a decimal point we don't move past the zero.
|
||||
let mut is_hex = false;
|
||||
if Some('0') == top_char {
|
||||
if let Some(base) = str_it.next() {
|
||||
// lead zeroes can only exist in
|
||||
// octal and hex base
|
||||
let mut do_clean_lead_zeroes = false;
|
||||
match base {
|
||||
'x' | 'X' => {
|
||||
is_hex = true;
|
||||
ret.offset += 2;
|
||||
ret.radix_in = Base::Hex;
|
||||
do_clean_lead_zeroes = true;
|
||||
}
|
||||
e @ '0'..='9' => {
|
||||
ret.offset += 1;
|
||||
if let FieldType::Intf = *field_type {
|
||||
ret.radix_in = Base::Octal;
|
||||
}
|
||||
if e == '0' {
|
||||
do_clean_lead_zeroes = true;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
if do_clean_lead_zeroes {
|
||||
let mut first = true;
|
||||
for ch_zero in str_it {
|
||||
// see notes on offset above:
|
||||
// this is why the offset for octal and decimal numbers
|
||||
// that reach this branch is 1 even though
|
||||
// they have already eaten the characters '00'
|
||||
// this is also why when hex encounters its
|
||||
// first zero it does not move its offset
|
||||
// forward because it does not know for sure
|
||||
// that it's current offset (of that zero)
|
||||
// is not the final offset,
|
||||
// whereas at that point octal knows its
|
||||
// current offset is not the final offset.
|
||||
match ch_zero {
|
||||
'0' => {
|
||||
if !(is_hex && first) {
|
||||
ret.offset += 1;
|
||||
}
|
||||
}
|
||||
// if decimal, keep last zero if one exists
|
||||
// (it's possible for last zero to
|
||||
// not exist at this branch if we're in hex input)
|
||||
'.' => break,
|
||||
// other digit, etc.
|
||||
_ => {
|
||||
if !(is_hex && first) {
|
||||
ret.offset += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if first {
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
// this is the function a Sub's print will delegate to
|
||||
// if it is a numeric field, passing the field details
|
||||
// and an iterator to the argument
|
||||
pub fn num_format(field: &FormatField, in_str_opt: Option<&String>) -> Option<String> {
|
||||
let field_char = field.field_char;
|
||||
|
||||
// num format mainly operates by further delegating to one of
|
||||
// several Formatter structs depending on the field
|
||||
// see formatter.rs for more details
|
||||
|
||||
// to do switch to static dispatch
|
||||
let formatter: Box<dyn Formatter> = match *field.field_type {
|
||||
FieldType::Intf => Box::new(Intf::new()),
|
||||
FieldType::Floatf => Box::new(Floatf::new()),
|
||||
FieldType::CninetyNineHexFloatf => Box::new(CninetyNineHexFloatf::new()),
|
||||
FieldType::Scif => Box::new(Scif::new()),
|
||||
FieldType::Decf => Box::new(Decf::new()),
|
||||
_ => {
|
||||
panic!("asked to do num format with non-num field type");
|
||||
}
|
||||
};
|
||||
let prim_opt=
|
||||
// if we can get an assumed value from looking at the first
|
||||
// few characters, use that value to create the FormatPrimitive
|
||||
if let Some(provided_num) = get_provided(in_str_opt) {
|
||||
let mut tmp = FormatPrimitive::default();
|
||||
match field_char {
|
||||
'u' | 'i' | 'd' => {
|
||||
tmp.pre_decimal = Some(
|
||||
format!("{provided_num}"));
|
||||
},
|
||||
'x' | 'X' => {
|
||||
tmp.pre_decimal = Some(
|
||||
format!("{provided_num:x}"));
|
||||
},
|
||||
'o' => {
|
||||
tmp.pre_decimal = Some(
|
||||
format!("{provided_num:o}"));
|
||||
},
|
||||
'e' | 'E' | 'g' | 'G' => {
|
||||
let as_str = format!("{provided_num}");
|
||||
let initial_prefix = get_initial_prefix(
|
||||
&as_str,
|
||||
field.field_type
|
||||
);
|
||||
tmp=formatter.get_primitive(field, &initial_prefix, &as_str)
|
||||
.expect("err during default provided num");
|
||||
},
|
||||
_ => {
|
||||
tmp.pre_decimal = Some(
|
||||
format!("{provided_num}"));
|
||||
tmp.post_decimal = Some(String::from("0"));
|
||||
}
|
||||
}
|
||||
Some(tmp)
|
||||
} else {
|
||||
// otherwise we'll interpret the argument as a number
|
||||
// using the appropriate Formatter
|
||||
let in_str = in_str_opt.expect(
|
||||
"please send the devs this message:
|
||||
\n get_provided is failing to ret as Some(0) on no str ");
|
||||
// first get information about the beginning of the
|
||||
// numeric argument that would be useful for
|
||||
// any formatter (int or float)
|
||||
let initial_prefix = get_initial_prefix(
|
||||
in_str,
|
||||
field.field_type
|
||||
);
|
||||
// then get the FormatPrimitive from the Formatter
|
||||
formatter.get_primitive(field, &initial_prefix, in_str)
|
||||
};
|
||||
// if we have a formatPrimitive, print its results
|
||||
// according to the field-char appropriate Formatter
|
||||
prim_opt.map(|prim| formatter.primitive_to_str(&prim, field.clone()))
|
||||
}
|
|
@ -1,452 +0,0 @@
|
|||
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
|
||||
|
||||
//! Sub is a token that represents a
|
||||
//! segment of the format string that is a substitution
|
||||
//! it is created by Sub's implementation of the Tokenizer trait
|
||||
//! Subs which have numeric field chars make use of the num_format
|
||||
//! submodule
|
||||
use crate::error::{UError, UResult};
|
||||
use itertools::{put_back_n, PutBackN};
|
||||
use std::error::Error;
|
||||
use std::fmt::Display;
|
||||
use std::io::Write;
|
||||
use std::iter::Peekable;
|
||||
use std::process::exit;
|
||||
use std::slice::Iter;
|
||||
use std::str::Chars;
|
||||
// use std::collections::HashSet;
|
||||
|
||||
use super::num_format::format_field::{FieldType, FormatField};
|
||||
use super::num_format::num_format;
|
||||
use super::token;
|
||||
use super::unescaped_text::UnescapedText;
|
||||
|
||||
const EXIT_ERR: i32 = 1;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SubError {
|
||||
InvalidSpec(String),
|
||||
}
|
||||
|
||||
impl Display for SubError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
|
||||
match self {
|
||||
Self::InvalidSpec(s) => write!(f, "%{s}: invalid conversion specification"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SubError {}
|
||||
|
||||
impl UError for SubError {}
|
||||
|
||||
fn convert_asterisk_arg_int(asterisk_arg: &str) -> isize {
|
||||
// this is a costly way to parse the
|
||||
// args used for asterisk values into integers
|
||||
// from various bases. Actually doing it correctly
|
||||
// (going through the pipeline to intf, but returning
|
||||
// the integer instead of writing it to string and then
|
||||
// back) is on the refactoring TODO
|
||||
let field_type = FieldType::Intf;
|
||||
let field_char = 'i';
|
||||
let field_info = FormatField {
|
||||
min_width: Some(0),
|
||||
second_field: Some(0),
|
||||
orig: &asterisk_arg.to_string(),
|
||||
field_type: &field_type,
|
||||
field_char: &field_char,
|
||||
};
|
||||
num_format::num_format(&field_info, Some(&asterisk_arg.to_string()))
|
||||
.unwrap()
|
||||
.parse::<isize>()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub enum CanAsterisk<T> {
|
||||
Fixed(T),
|
||||
Asterisk,
|
||||
}
|
||||
|
||||
// Sub is a tokenizer which creates tokens
|
||||
// for substitution segments of a format string
|
||||
pub struct Sub {
|
||||
min_width: CanAsterisk<Option<isize>>,
|
||||
second_field: CanAsterisk<Option<u32>>,
|
||||
field_char: char,
|
||||
field_type: FieldType,
|
||||
orig: String,
|
||||
prefix_char: char,
|
||||
}
|
||||
impl Sub {
|
||||
pub fn new(
|
||||
min_width: CanAsterisk<Option<isize>>,
|
||||
second_field: CanAsterisk<Option<u32>>,
|
||||
field_char: char,
|
||||
orig: String,
|
||||
prefix_char: char,
|
||||
) -> Self {
|
||||
// for more dry printing, field characters are grouped
|
||||
// in initialization of token.
|
||||
let field_type = match field_char {
|
||||
's' | 'b' => FieldType::Strf,
|
||||
'd' | 'i' | 'u' | 'o' | 'x' | 'X' => FieldType::Intf,
|
||||
'f' | 'F' => FieldType::Floatf,
|
||||
'a' | 'A' => FieldType::CninetyNineHexFloatf,
|
||||
'e' | 'E' => FieldType::Scif,
|
||||
'g' | 'G' => FieldType::Decf,
|
||||
'c' => FieldType::Charf,
|
||||
_ => {
|
||||
// should be unreachable.
|
||||
println!("Invalid field type");
|
||||
exit(EXIT_ERR);
|
||||
}
|
||||
};
|
||||
Self {
|
||||
min_width,
|
||||
second_field,
|
||||
field_char,
|
||||
field_type,
|
||||
orig,
|
||||
prefix_char,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct SubParser {
|
||||
min_width_tmp: Option<String>,
|
||||
min_width_is_asterisk: bool,
|
||||
past_decimal: bool,
|
||||
second_field_tmp: Option<String>,
|
||||
second_field_is_asterisk: bool,
|
||||
specifiers_found: bool,
|
||||
field_char: Option<char>,
|
||||
text_so_far: String,
|
||||
}
|
||||
|
||||
impl SubParser {
|
||||
fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
pub(crate) fn from_it<W>(
|
||||
writer: &mut W,
|
||||
it: &mut PutBackN<Chars>,
|
||||
args: &mut Peekable<Iter<String>>,
|
||||
) -> UResult<Option<token::Token>>
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
let mut parser = Self::new();
|
||||
if parser.sub_vals_retrieved(it)? {
|
||||
let t = Self::build_token(parser);
|
||||
t.write(writer, args);
|
||||
Ok(Some(t))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
fn build_token(parser: Self) -> token::Token {
|
||||
// not a self method so as to allow move of sub-parser vals.
|
||||
// return new Sub struct as token
|
||||
let prefix_char = match &parser.min_width_tmp {
|
||||
Some(width) if width.starts_with('0') => '0',
|
||||
_ => ' ',
|
||||
};
|
||||
|
||||
token::Token::Sub(Sub::new(
|
||||
if parser.min_width_is_asterisk {
|
||||
CanAsterisk::Asterisk
|
||||
} else {
|
||||
CanAsterisk::Fixed(
|
||||
parser
|
||||
.min_width_tmp
|
||||
.map(|x| x.parse::<isize>().unwrap_or(1)),
|
||||
)
|
||||
},
|
||||
if parser.second_field_is_asterisk {
|
||||
CanAsterisk::Asterisk
|
||||
} else {
|
||||
CanAsterisk::Fixed(parser.second_field_tmp.map(|x| x.parse::<u32>().unwrap()))
|
||||
},
|
||||
parser.field_char.unwrap(),
|
||||
parser.text_so_far,
|
||||
prefix_char,
|
||||
))
|
||||
}
|
||||
#[allow(clippy::cognitive_complexity)]
|
||||
fn sub_vals_retrieved(&mut self, it: &mut PutBackN<Chars>) -> UResult<bool> {
|
||||
if !Self::successfully_eat_prefix(it, &mut self.text_so_far)? {
|
||||
return Ok(false);
|
||||
}
|
||||
// this fn in particular is much longer than it needs to be
|
||||
// .could get a lot
|
||||
// of code savings just by cleaning it up. shouldn't use a regex
|
||||
// though, as we want to mimic the original behavior of printing
|
||||
// the field as interpreted up until the error in the field.
|
||||
|
||||
let mut legal_fields = vec![
|
||||
// 'a', 'A', //c99 hex float implementation not yet complete
|
||||
'b', 'c', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'i', 'o', 's', 'u', 'x', 'X',
|
||||
];
|
||||
let mut specifiers = vec!['h', 'j', 'l', 'L', 't', 'z'];
|
||||
legal_fields.sort_unstable();
|
||||
specifiers.sort_unstable();
|
||||
|
||||
// divide substitution from %([0-9]+)?(.[0-9+])?([a-zA-Z])
|
||||
// into min_width, second_field, field_char
|
||||
for ch in it {
|
||||
self.text_so_far.push(ch);
|
||||
match ch {
|
||||
'-' | '*' | '0'..='9' => {
|
||||
if self.past_decimal {
|
||||
// second field should never have a
|
||||
// negative value
|
||||
if self.second_field_is_asterisk || ch == '-' || self.specifiers_found {
|
||||
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
|
||||
}
|
||||
if self.second_field_tmp.is_none() {
|
||||
self.second_field_tmp = Some(String::new());
|
||||
}
|
||||
match self.second_field_tmp.as_mut() {
|
||||
Some(x) => {
|
||||
if ch == '*' && !x.is_empty() {
|
||||
return Err(
|
||||
SubError::InvalidSpec(self.text_so_far.clone()).into()
|
||||
);
|
||||
}
|
||||
if ch == '*' {
|
||||
self.second_field_is_asterisk = true;
|
||||
}
|
||||
x.push(ch);
|
||||
}
|
||||
None => {
|
||||
panic!("should be unreachable");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if self.min_width_is_asterisk || self.specifiers_found {
|
||||
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
|
||||
}
|
||||
if self.min_width_tmp.is_none() {
|
||||
self.min_width_tmp = Some(String::new());
|
||||
}
|
||||
match self.min_width_tmp.as_mut() {
|
||||
Some(x) => {
|
||||
if (ch == '-' || ch == '*') && !x.is_empty() {
|
||||
return Err(
|
||||
SubError::InvalidSpec(self.text_so_far.clone()).into()
|
||||
);
|
||||
}
|
||||
if ch == '*' {
|
||||
self.min_width_is_asterisk = true;
|
||||
}
|
||||
x.push(ch);
|
||||
}
|
||||
None => {
|
||||
panic!("should be unreachable");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'.' => {
|
||||
if self.past_decimal {
|
||||
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
|
||||
} else {
|
||||
self.past_decimal = true;
|
||||
}
|
||||
}
|
||||
x if legal_fields.binary_search(&x).is_ok() => {
|
||||
self.field_char = Some(ch);
|
||||
self.text_so_far.push(ch);
|
||||
break;
|
||||
}
|
||||
x if specifiers.binary_search(&x).is_ok() => {
|
||||
if !self.past_decimal {
|
||||
self.past_decimal = true;
|
||||
}
|
||||
if !self.specifiers_found {
|
||||
self.specifiers_found = true;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.field_char.is_none() {
|
||||
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
|
||||
}
|
||||
let field_char_retrieved = self.field_char.unwrap();
|
||||
if self.past_decimal && self.second_field_tmp.is_none() {
|
||||
self.second_field_tmp = Some(String::from("0"));
|
||||
}
|
||||
self.validate_field_params(field_char_retrieved)?;
|
||||
// if the dot is provided without a second field
|
||||
// printf interprets it as 0.
|
||||
if let Some(x) = self.second_field_tmp.as_mut() {
|
||||
if x.is_empty() {
|
||||
self.min_width_tmp = Some(String::from("0"));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
fn successfully_eat_prefix(
|
||||
it: &mut PutBackN<Chars>,
|
||||
text_so_far: &mut String,
|
||||
) -> UResult<bool> {
|
||||
// get next two chars,
|
||||
// if they're '%%' we're not tokenizing it
|
||||
// else put chars back
|
||||
let preface = it.next();
|
||||
let n_ch = it.next();
|
||||
if preface == Some('%') && n_ch != Some('%') {
|
||||
match n_ch {
|
||||
Some(x) => {
|
||||
it.put_back(x);
|
||||
Ok(true)
|
||||
}
|
||||
None => {
|
||||
text_so_far.push('%');
|
||||
Err(SubError::InvalidSpec(text_so_far.clone()).into())
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if let Some(x) = n_ch {
|
||||
it.put_back(x);
|
||||
};
|
||||
if let Some(x) = preface {
|
||||
it.put_back(x);
|
||||
};
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
fn validate_field_params(&self, field_char: char) -> UResult<()> {
|
||||
// check for illegal combinations here when possible vs
|
||||
// on each application so we check less per application
|
||||
// to do: move these checks to Sub::new
|
||||
if (field_char == 's' && self.min_width_tmp == Some(String::from("0")))
|
||||
|| (field_char == 'c'
|
||||
&& (self.min_width_tmp == Some(String::from("0")) || self.past_decimal))
|
||||
|| (field_char == 'b'
|
||||
&& (self.min_width_tmp.is_some()
|
||||
|| self.past_decimal
|
||||
|| self.second_field_tmp.is_some()))
|
||||
{
|
||||
// invalid string substitution
|
||||
// to do: include information about an invalid
|
||||
// string substitution
|
||||
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub {
|
||||
#[allow(clippy::cognitive_complexity)]
|
||||
pub(crate) fn write<W>(&self, writer: &mut W, pf_args_it: &mut Peekable<Iter<String>>)
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
let field = FormatField {
|
||||
min_width: match self.min_width {
|
||||
CanAsterisk::Fixed(x) => x,
|
||||
CanAsterisk::Asterisk => {
|
||||
match pf_args_it.next() {
|
||||
// temporary, use intf.rs instead
|
||||
Some(x) => Some(convert_asterisk_arg_int(x)),
|
||||
None => Some(0),
|
||||
}
|
||||
}
|
||||
},
|
||||
second_field: match self.second_field {
|
||||
CanAsterisk::Fixed(x) => x,
|
||||
CanAsterisk::Asterisk => {
|
||||
match pf_args_it.next() {
|
||||
// temporary, use intf.rs instead
|
||||
Some(x) => {
|
||||
let result = convert_asterisk_arg_int(x);
|
||||
if result < 0 {
|
||||
None
|
||||
} else {
|
||||
Some(result as u32)
|
||||
}
|
||||
}
|
||||
None => Some(0),
|
||||
}
|
||||
}
|
||||
},
|
||||
field_char: &self.field_char,
|
||||
field_type: &self.field_type,
|
||||
orig: &self.orig,
|
||||
};
|
||||
let pf_arg = pf_args_it.next();
|
||||
|
||||
// minimum width is handled independently of actual
|
||||
// field char
|
||||
let pre_min_width_opt: Option<String> = match *field.field_type {
|
||||
// if %s just return arg
|
||||
// if %b use UnescapedText module's unescape-fn
|
||||
// if %c return first char of arg
|
||||
FieldType::Strf | FieldType::Charf => {
|
||||
match pf_arg {
|
||||
Some(arg_string) => {
|
||||
match *field.field_char {
|
||||
's' => Some(match field.second_field {
|
||||
Some(max) => String::from(&arg_string[..max as usize]),
|
||||
None => arg_string.clone(),
|
||||
}),
|
||||
'b' => {
|
||||
let mut a_it = put_back_n(arg_string.chars());
|
||||
UnescapedText::from_it_core(writer, &mut a_it, true);
|
||||
None
|
||||
}
|
||||
// for 'c': get iter of string vals,
|
||||
// get opt<char> of first val
|
||||
// and map it to opt<String>
|
||||
/* 'c' | */
|
||||
_ => arg_string.chars().next().map(|x| x.to_string()),
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// non string/char fields are delegated to num_format
|
||||
num_format::num_format(&field, pf_arg)
|
||||
}
|
||||
};
|
||||
if let Some(pre_min_width) = pre_min_width_opt {
|
||||
// if have a string, print it, ensuring minimum width is met.
|
||||
write!(
|
||||
writer,
|
||||
"{}",
|
||||
match field.min_width {
|
||||
Some(min_width) => {
|
||||
let diff: isize = min_width.abs() - pre_min_width.len() as isize;
|
||||
if diff > 0 {
|
||||
let mut final_str = String::new();
|
||||
// definitely more efficient ways
|
||||
// to do this.
|
||||
let pad_before = min_width > 0;
|
||||
if !pad_before {
|
||||
final_str.push_str(&pre_min_width);
|
||||
}
|
||||
for _ in 0..diff {
|
||||
final_str.push(self.prefix_char);
|
||||
}
|
||||
if pad_before {
|
||||
final_str.push_str(&pre_min_width);
|
||||
}
|
||||
final_str
|
||||
} else {
|
||||
pre_min_width
|
||||
}
|
||||
}
|
||||
None => pre_min_width,
|
||||
}
|
||||
)
|
||||
.ok();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,39 +0,0 @@
|
|||
//! Traits and enums dealing with Tokenization of printf Format String
|
||||
use std::io::Write;
|
||||
use std::iter::Peekable;
|
||||
use std::slice::Iter;
|
||||
|
||||
use crate::features::tokenize::sub::Sub;
|
||||
use crate::features::tokenize::unescaped_text::UnescapedText;
|
||||
|
||||
// A token object is an object that can print the expected output
|
||||
// of a contiguous segment of the format string, and
|
||||
// requires at most 1 argument
|
||||
pub enum Token {
|
||||
Sub(Sub),
|
||||
UnescapedText(UnescapedText),
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub(crate) fn write<W>(&self, writer: &mut W, args: &mut Peekable<Iter<String>>)
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
match self {
|
||||
Self::Sub(sub) => sub.write(writer, args),
|
||||
Self::UnescapedText(unescaped_text) => unescaped_text.write(writer),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A tokenizer object is an object that takes an iterator
|
||||
// at a position in a format string, and sees whether
|
||||
// it can return a token of a type it knows how to produce
|
||||
// if so, return the token, move the iterator past the
|
||||
// format string text the token represents, and if an
|
||||
// argument is used move the argument iter forward one
|
||||
|
||||
// creating token of a format string segment should also cause
|
||||
// printing of that token's value. Essentially tokenizing
|
||||
// a whole format string will print the format string and consume
|
||||
// a number of arguments equal to the number of argument-using tokens
|
|
@ -1,279 +0,0 @@
|
|||
//! UnescapedText is a tokenizer impl
|
||||
//! for tokenizing character literals,
|
||||
//! and escaped character literals (of allowed escapes),
|
||||
//! into an unescaped text byte array
|
||||
|
||||
// spell-checker:ignore (ToDO) retval hexchars octals printf's bvec vals coreutil addchar eval bytecode bslice
|
||||
|
||||
use itertools::PutBackN;
|
||||
use std::char::from_u32;
|
||||
use std::io::Write;
|
||||
use std::process::exit;
|
||||
use std::str::Chars;
|
||||
|
||||
use super::token;
|
||||
|
||||
const EXIT_OK: i32 = 0;
|
||||
const EXIT_ERR: i32 = 1;
|
||||
|
||||
// by default stdout only flushes
|
||||
// to console when a newline is passed.
|
||||
macro_rules! write_and_flush {
|
||||
($writer:expr, $($args:tt)+) => ({
|
||||
write!($writer, "{}", $($args)+).ok();
|
||||
$writer.flush().ok();
|
||||
})
|
||||
}
|
||||
|
||||
fn flush_bytes<W>(writer: &mut W, bslice: &[u8])
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
writer.write_all(bslice).ok();
|
||||
writer.flush().ok();
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct UnescapedText(Vec<u8>);
|
||||
impl UnescapedText {
|
||||
fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
// take an iterator to the format string
|
||||
// consume between min and max chars
|
||||
// and return it as a base-X number
|
||||
fn base_to_u32(min_chars: u8, max_chars: u8, base: u32, it: &mut PutBackN<Chars>) -> u32 {
|
||||
let mut retval: u32 = 0;
|
||||
let mut found = 0;
|
||||
while found < max_chars {
|
||||
// if end of input break
|
||||
let nc = it.next();
|
||||
match nc {
|
||||
Some(digit) => {
|
||||
// if end of hexchars break
|
||||
match digit.to_digit(base) {
|
||||
Some(d) => {
|
||||
found += 1;
|
||||
retval *= base;
|
||||
retval += d;
|
||||
}
|
||||
None => {
|
||||
it.put_back(digit);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if found < min_chars {
|
||||
// only ever expected for hex
|
||||
println!("missing hexadecimal number in escape"); //todo stderr
|
||||
exit(EXIT_ERR);
|
||||
}
|
||||
retval
|
||||
}
|
||||
// validates against valid
|
||||
// IEC 10646 vals - these values
|
||||
// are pinned against the more popular
|
||||
// printf so as to not disrupt when
|
||||
// dropped-in as a replacement.
|
||||
fn validate_iec(val: u32, eight_word: bool) {
|
||||
let mut preface = 'u';
|
||||
let leading_zeros = if eight_word {
|
||||
preface = 'U';
|
||||
8
|
||||
} else {
|
||||
4
|
||||
};
|
||||
let err_msg = format!("invalid universal character name {preface}{val:0leading_zeros$x}");
|
||||
if (val < 159 && (val != 36 && val != 64 && val != 96)) || (val > 55296 && val < 57343) {
|
||||
println!("{err_msg}"); //todo stderr
|
||||
exit(EXIT_ERR);
|
||||
}
|
||||
}
|
||||
// pass an iterator that succeeds an '/',
|
||||
// and process the remaining character
|
||||
// adding the unescaped bytes
|
||||
// to the passed byte_vec
|
||||
// in subs_mode change octal behavior
|
||||
fn handle_escaped<W>(
|
||||
writer: &mut W,
|
||||
byte_vec: &mut Vec<u8>,
|
||||
it: &mut PutBackN<Chars>,
|
||||
subs_mode: bool,
|
||||
) where
|
||||
W: Write,
|
||||
{
|
||||
let ch = it.next().unwrap_or('\\');
|
||||
match ch {
|
||||
'0'..='9' | 'x' => {
|
||||
let min_len = 1;
|
||||
let mut max_len = 2;
|
||||
let mut base = 16;
|
||||
let ignore = false;
|
||||
match ch {
|
||||
'x' => {}
|
||||
e @ '0'..='9' => {
|
||||
max_len = 3;
|
||||
base = 8;
|
||||
// in practice, gnu coreutils printf
|
||||
// interprets octals without a
|
||||
// leading zero in %b
|
||||
// but it only skips leading zeros
|
||||
// in %b mode.
|
||||
// if we ever want to match gnu coreutil
|
||||
// printf's docs instead of its behavior
|
||||
// we'd set this to true.
|
||||
// if subs_mode && e != '0'
|
||||
// { ignore = true; }
|
||||
if !subs_mode || e != '0' {
|
||||
it.put_back(ch);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
if ignore {
|
||||
byte_vec.push(ch as u8);
|
||||
} else {
|
||||
let val = (Self::base_to_u32(min_len, max_len, base, it) % 256) as u8;
|
||||
byte_vec.push(val);
|
||||
let bvec = [val];
|
||||
flush_bytes(writer, &bvec);
|
||||
}
|
||||
}
|
||||
e => {
|
||||
// only for hex and octal
|
||||
// is byte encoding specified.
|
||||
// otherwise, why not leave the door open
|
||||
// for other encodings unless it turns out
|
||||
// a bottleneck.
|
||||
let mut s = String::new();
|
||||
let ch = match e {
|
||||
'\\' => '\\',
|
||||
'"' => '"',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
// bell
|
||||
'a' => '\x07',
|
||||
// backspace
|
||||
'b' => '\x08',
|
||||
// vertical tab
|
||||
'v' => '\x0B',
|
||||
// form feed
|
||||
'f' => '\x0C',
|
||||
// escape character
|
||||
'e' => '\x1B',
|
||||
'c' => exit(EXIT_OK),
|
||||
'u' | 'U' => {
|
||||
let len = match e {
|
||||
'u' => 4,
|
||||
/* 'U' | */ _ => 8,
|
||||
};
|
||||
let val = Self::base_to_u32(len, len, 16, it);
|
||||
Self::validate_iec(val, false);
|
||||
if let Some(c) = from_u32(val) {
|
||||
c
|
||||
} else {
|
||||
'-'
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
s.push('\\');
|
||||
ch
|
||||
}
|
||||
};
|
||||
s.push(ch);
|
||||
write_and_flush!(writer, &s);
|
||||
byte_vec.extend(s.bytes());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// take an iterator to a string,
|
||||
// and return a wrapper around a Vec<u8> of unescaped bytes
|
||||
// break on encounter of sub symbol ('%[^%]') unless called
|
||||
// through %b subst.
|
||||
#[allow(clippy::cognitive_complexity)]
|
||||
pub fn from_it_core<W>(
|
||||
writer: &mut W,
|
||||
it: &mut PutBackN<Chars>,
|
||||
subs_mode: bool,
|
||||
) -> Option<token::Token>
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
let mut addchar = false;
|
||||
let mut new_text = Self::new();
|
||||
let mut tmp_str = String::new();
|
||||
{
|
||||
let new_vec: &mut Vec<u8> = &mut (new_text.0);
|
||||
while let Some(ch) = it.next() {
|
||||
if !addchar {
|
||||
addchar = true;
|
||||
}
|
||||
match ch {
|
||||
x if x != '\\' && x != '%' => {
|
||||
// lazy branch eval
|
||||
// remember this fn could be called
|
||||
// many times in a single exec through %b
|
||||
write_and_flush!(writer, ch);
|
||||
tmp_str.push(ch);
|
||||
}
|
||||
'\\' => {
|
||||
// the literal may be a literal bytecode
|
||||
// and not valid utf-8. Str only supports
|
||||
// valid utf-8.
|
||||
// if we find the unnecessary drain
|
||||
// on non hex or octal escapes is costly
|
||||
// then we can make it faster/more complex
|
||||
// with as-necessary draining.
|
||||
if !tmp_str.is_empty() {
|
||||
new_vec.extend(tmp_str.bytes());
|
||||
tmp_str = String::new();
|
||||
}
|
||||
Self::handle_escaped(writer, new_vec, it, subs_mode);
|
||||
}
|
||||
x if x == '%' && !subs_mode => {
|
||||
if let Some(follow) = it.next() {
|
||||
if follow == '%' {
|
||||
write_and_flush!(writer, ch);
|
||||
tmp_str.push(ch);
|
||||
} else {
|
||||
it.put_back(follow);
|
||||
it.put_back(ch);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
it.put_back(ch);
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
write_and_flush!(writer, ch);
|
||||
tmp_str.push(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !tmp_str.is_empty() {
|
||||
new_vec.extend(tmp_str.bytes());
|
||||
}
|
||||
}
|
||||
if addchar {
|
||||
Some(token::Token::UnescapedText(new_text))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
impl UnescapedText {
|
||||
pub(crate) fn write<W>(&self, writer: &mut W)
|
||||
where
|
||||
W: Write,
|
||||
{
|
||||
flush_bytes(writer, &self.0[..]);
|
||||
}
|
||||
}
|
|
@ -44,8 +44,8 @@ pub use crate::features::fs;
|
|||
pub use crate::features::fsext;
|
||||
#[cfg(feature = "lines")]
|
||||
pub use crate::features::lines;
|
||||
#[cfg(feature = "memo")]
|
||||
pub use crate::features::memo;
|
||||
#[cfg(feature = "format")]
|
||||
pub use crate::features::format;
|
||||
#[cfg(feature = "ringbuffer")]
|
||||
pub use crate::features::ringbuffer;
|
||||
#[cfg(feature = "sum")]
|
||||
|
|
Loading…
Reference in a new issue