uucore: start work on a completely new printf implementation

This commit is contained in:
Terts Diepraam 2023-08-02 23:57:26 +02:00
parent f37318d37c
commit a3e68d5bbd
23 changed files with 672 additions and 2714 deletions

View file

@ -76,7 +76,7 @@ entries = ["libc"]
fs = ["libc", "winapi-util", "windows-sys"]
fsext = ["libc", "time", "windows-sys"]
lines = []
memo = ["itertools"]
format = ["itertools"]
mode = ["libc"]
perms = ["libc", "walkdir"]
process = ["libc"]

View file

@ -8,14 +8,12 @@ pub mod fs;
pub mod fsext;
#[cfg(feature = "lines")]
pub mod lines;
#[cfg(feature = "memo")]
pub mod memo;
#[cfg(feature = "format")]
pub mod format;
#[cfg(feature = "ringbuffer")]
pub mod ringbuffer;
#[cfg(feature = "sum")]
pub mod sum;
#[cfg(feature = "memo")]
mod tokenize;
// * (platform-specific) feature-gated modules
// ** non-windows (i.e. Unix + Fuchsia)

View file

@ -0,0 +1,144 @@
//! Main entry point for our implementation of printf.
//!
//! The [`printf`] and [`sprintf`] closely match the behavior of the
//! corresponding C functions: the former renders a formatted string
//! to stdout, the latter renders to a new [`String`] object.
//!
//! In addition to the [`printf`] and [`sprintf`] functions, we expose the
//! [`Format`] struct, which represents a parsed format string. This reduces
//! the need for parsing a format string multiple times and assures that no
//! parsing errors occur during writing.
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
// mod num_format;
mod spec;
use spec::Spec;
use std::io::{stdout, Write};
pub enum FormatError {
SpecError,
IoError(std::io::Error),
NoMoreArguments,
InvalidArgument(FormatArgument),
}
/// A single item to format
enum FormatItem {
/// A format specifier
Spec(Spec),
/// Some plain text
Text(Vec<u8>),
/// A single character
///
/// Added in addition to `Text` as an optimization.
Char(u8),
}
pub enum FormatArgument {
Char(char),
String(String),
UnsignedInt(u64),
SignedInt(i64),
Float(f64),
}
impl FormatItem {
fn write<'a>(&self, mut writer: impl Write, args: &mut impl Iterator<Item = FormatArgument>) -> Result<(), FormatError> {
match self {
FormatItem::Spec(spec) => spec.write(writer, args),
FormatItem::Text(bytes) => writer.write_all(bytes).map_err(FormatError::IoError),
FormatItem::Char(char) => writer.write_all(&[*char]).map_err(FormatError::IoError),
}
}
}
fn parse_iter(fmt: &[u8]) -> impl Iterator<Item = Result<FormatItem, FormatError>> + '_ {
let mut rest = fmt;
std::iter::from_fn(move || {
if rest.is_empty() {
return None;
}
match rest.iter().position(|c| *c == b'%') {
None => {
let final_text = rest;
rest = &[];
Some(Ok(FormatItem::Text(final_text.into())))
}
Some(0) => {
// Handle the spec
rest = &rest[1..];
match rest.get(0) {
None => Some(Ok(FormatItem::Char(b'%'))),
Some(b'%') => {
rest = &rest[1..];
Some(Ok(FormatItem::Char(b'%')))
}
Some(_) => {
let spec = match Spec::parse(&mut rest) {
Some(spec) => spec,
None => return Some(Err(FormatError::SpecError)),
};
Some(Ok(FormatItem::Spec(spec)))
}
}
}
Some(i) => {
// The `after` slice includes the % so it will be handled correctly
// in the next iteration.
let (before, after) = rest.split_at(i);
rest = after;
return Some(Ok(FormatItem::Text(before.into())));
}
}
})
}
/// Write a formatted string to stdout.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`sprintf`], which creates a new formatted [`String`].
///
/// # Examples
///
/// ```rust
/// use uucore::format::printf;
///
/// printf("hello %s", &["world".to_string()]).unwrap();
/// // prints "hello world"
/// ```
pub fn printf(format_string: &[u8], arguments: impl IntoIterator<Item = FormatArgument>) -> Result<(), FormatError> {
printf_writer(stdout(), format_string, arguments)
}
fn printf_writer(mut writer: impl Write, format_string: &[u8], args: impl IntoIterator<Item = FormatArgument>) -> Result<(), FormatError> {
let mut args = args.into_iter();
for item in parse_iter(format_string) {
item?.write(&mut writer, &mut args)?;
}
Ok(())
}
/// Create a new formatted string.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`printf`], which prints to stdout.
///
/// # Examples
///
/// ```rust
/// use uucore::format::sprintf;
///
/// let s = sprintf("hello %s", &["world".to_string()]).unwrap();
/// assert_eq!(s, "hello world".to_string());
/// ```
pub fn sprintf(format_string: &[u8], arguments: impl IntoIterator<Item = FormatArgument>) -> Result<Vec<u8>, FormatError> {
let mut writer = Vec::new();
printf_writer(&mut writer, format_string, arguments)?;
Ok(writer)
}

View file

@ -0,0 +1,523 @@
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
use super::{FormatArgument, FormatError};
use std::{fmt::Display, io::Write};
pub enum Spec {
Char {
width: Option<CanAsterisk<usize>>,
align_left: bool,
},
String {
width: Option<CanAsterisk<usize>>,
align_left: bool,
},
SignedInt {
width: Option<CanAsterisk<usize>>,
positive_sign: PositiveSign,
alignment: NumberAlignment,
},
UnsignedInt {
variant: UnsignedIntVariant,
width: Option<CanAsterisk<usize>>,
alignment: NumberAlignment,
},
Float {
variant: FloatVariant,
case: Case,
force_decimal: ForceDecimal,
width: Option<CanAsterisk<usize>>,
positive_sign: PositiveSign,
alignment: NumberAlignment,
precision: Option<CanAsterisk<usize>>,
},
}
#[derive(Clone, Copy)]
pub enum UnsignedIntVariant {
Decimal,
Octal(Prefix),
Hexadecimal(Case, Prefix),
}
#[derive(Clone, Copy)]
pub enum FloatVariant {
Decimal,
Scientific,
Shortest,
Hexadecimal,
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum Case {
Lowercase,
Uppercase,
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum Prefix {
No,
Yes,
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum ForceDecimal {
No,
Yes,
}
#[derive(Clone, Copy)]
pub enum PositiveSign {
None,
Plus,
Space,
}
#[derive(Clone, Copy)]
pub enum NumberAlignment {
Left,
RightSpace,
RightZero,
}
/// Precision and width specified might use an asterisk to indicate that they are
/// determined by an argument.
#[derive(Clone, Copy)]
pub enum CanAsterisk<T> {
Fixed(T),
Asterisk,
}
/// Size of the expected type (ignored)
///
/// We ignore this parameter entirely, but we do parse it.
/// It could be used in the future if the need arises.
enum Length {
/// signed/unsigned char ("hh")
Char,
/// signed/unsigned short int ("h")
Short,
/// signed/unsigned long int ("l")
Long,
/// signed/unsigned long long int ("ll")
LongLong,
/// intmax_t ("j")
IntMaxT,
/// size_t ("z")
SizeT,
/// ptrdiff_t ("t")
PtfDiffT,
/// long double ("L")
LongDouble,
}
impl Spec {
pub fn parse(rest: &mut &[u8]) -> Option<Self> {
// Based on the C++ reference, the spec format looks like:
//
// %[flags][width][.precision][length]specifier
//
// However, we have already parsed the '%'.
let mut minus = false;
let mut plus = false;
let mut space = false;
let mut hash = false;
let mut zero = false;
while let Some(x @ (b'-' | b'+' | b' ' | b'#' | b'0')) = rest.get(0) {
match x {
b'-' => minus = true,
b'+' => plus = true,
b' ' => space = true,
b'#' => hash = true,
b'0' => zero = true,
_ => unreachable!(),
}
*rest = &rest[1..]
}
let width = eat_asterisk_or_number(rest);
let precision = if let Some(b'.') = rest.get(0) {
Some(eat_asterisk_or_number(rest).unwrap_or(CanAsterisk::Fixed(0)))
} else {
None
};
let length = rest.get(0).and_then(|c| {
Some(match c {
b'h' => {
if let Some(b'h') = rest.get(1) {
*rest = &rest[1..];
Length::Char
} else {
Length::Short
}
}
b'l' => {
if let Some(b'l') = rest.get(1) {
*rest = &rest[1..];
Length::Long
} else {
Length::LongLong
}
}
b'j' => Length::IntMaxT,
b'z' => Length::SizeT,
b't' => Length::PtfDiffT,
b'L' => Length::LongDouble,
_ => return None,
})
});
if length.is_some() {
*rest = &rest[1..];
}
Some(match rest.get(0)? {
b'c' => Spec::Char {
width,
align_left: minus,
},
b's' => Spec::String {
width,
align_left: minus,
},
b'd' | b'i' => Spec::SignedInt {
width,
alignment: match (minus, zero) {
(true, _) => NumberAlignment::Left,
(false, true) => NumberAlignment::RightZero,
(false, false) => NumberAlignment::RightSpace,
},
positive_sign: match (plus, space) {
(true, _) => PositiveSign::Plus,
(false, true) => PositiveSign::Space,
(false, false) => PositiveSign::None,
},
},
c @ (b'u' | b'o' | b'x' | b'X') => {
let prefix = match hash {
false => Prefix::No,
true => Prefix::Yes,
};
let alignment = match (minus, zero) {
(true, _) => NumberAlignment::Left,
(false, true) => NumberAlignment::RightZero,
(false, false) => NumberAlignment::RightSpace,
};
let variant = match c {
b'u' => UnsignedIntVariant::Decimal,
b'o' => UnsignedIntVariant::Octal(prefix),
b'x' => UnsignedIntVariant::Hexadecimal(Case::Lowercase, prefix),
b'X' => UnsignedIntVariant::Hexadecimal(Case::Uppercase, prefix),
_ => unreachable!(),
};
Spec::UnsignedInt {
variant,
width,
alignment,
}
}
c @ (b'f' | b'F' | b'e' | b'E' | b'g' | b'G' | b'a' | b'A') => Spec::Float {
width,
precision,
variant: match c {
b'f' | b'F' => FloatVariant::Decimal,
b'e' | b'E' => FloatVariant::Scientific,
b'g' | b'G' => FloatVariant::Shortest,
b'a' | b'A' => FloatVariant::Hexadecimal,
_ => unreachable!(),
},
force_decimal: match hash {
false => ForceDecimal::No,
true => ForceDecimal::Yes,
},
case: match c.is_ascii_uppercase() {
false => Case::Lowercase,
true => Case::Uppercase,
},
alignment: match (minus, zero) {
(true, _) => NumberAlignment::Left,
(false, true) => NumberAlignment::RightZero,
(false, false) => NumberAlignment::RightSpace,
},
positive_sign: match (plus, space) {
(true, _) => PositiveSign::Plus,
(false, true) => PositiveSign::Space,
(false, false) => PositiveSign::None,
},
},
_ => return None,
})
}
pub fn write<'a>(
&self,
mut writer: impl Write,
mut args: impl Iterator<Item = FormatArgument>,
) -> Result<(), FormatError> {
match self {
&Spec::Char { width, align_left } => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(&mut args)?;
match arg {
FormatArgument::Char(c) => write_padded(writer, c, width, false, align_left),
_ => Err(FormatError::InvalidArgument(arg)),
}
}
&Spec::String { width, align_left } => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(&mut args)?;
match arg {
FormatArgument::String(s) => write_padded(writer, s, width, false, align_left),
_ => Err(FormatError::InvalidArgument(arg)),
}
}
&Spec::SignedInt {
width,
positive_sign,
alignment,
} => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(&mut args)?;
let FormatArgument::SignedInt(i) = arg else {
return Err(FormatError::InvalidArgument(arg));
};
if i >= 0 {
match positive_sign {
PositiveSign::None => Ok(()),
PositiveSign::Plus => write!(writer, "+"),
PositiveSign::Space => write!(writer, " "),
}
.map_err(FormatError::IoError)?;
}
match alignment {
NumberAlignment::Left => write!(writer, "{i:<width$}"),
NumberAlignment::RightSpace => write!(writer, "{i:>width$}"),
NumberAlignment::RightZero => write!(writer, "{i:0>width$}"),
}
.map_err(FormatError::IoError)
}
&Spec::UnsignedInt {
variant,
width,
alignment,
} => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let arg = next_arg(args)?;
let FormatArgument::SignedInt(i) = arg else {
return Err(FormatError::InvalidArgument(arg));
};
let s = match variant {
UnsignedIntVariant::Decimal => format!("{i}"),
UnsignedIntVariant::Octal(Prefix::No) => format!("{i:o}"),
UnsignedIntVariant::Octal(Prefix::Yes) => format!("{i:#o}"),
UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::No) => {
format!("{i:x}")
}
UnsignedIntVariant::Hexadecimal(Case::Lowercase, Prefix::Yes) => {
format!("{i:#x}")
}
UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::No) => {
format!("{i:X}")
}
UnsignedIntVariant::Hexadecimal(Case::Uppercase, Prefix::Yes) => {
format!("{i:#X}")
}
};
match alignment {
NumberAlignment::Left => write!(writer, "{s:<width$}"),
NumberAlignment::RightSpace => write!(writer, "{s:>width$}"),
NumberAlignment::RightZero => write!(writer, "{s:0>width$}"),
}
.map_err(FormatError::IoError)
}
&Spec::Float {
variant,
case,
force_decimal,
width,
positive_sign,
alignment,
precision,
} => {
let width = resolve_asterisk(width, &mut args)?.unwrap_or(0);
let precision = resolve_asterisk(precision, &mut args)?.unwrap_or(6);
let arg = next_arg(args)?;
let FormatArgument::Float(f) = arg else {
return Err(FormatError::InvalidArgument(arg));
};
match positive_sign {
PositiveSign::None => Ok(()),
PositiveSign::Plus => write!(writer, "+"),
PositiveSign::Space => write!(writer, " "),
}
.map_err(FormatError::IoError)?;
let s = match variant {
FloatVariant::Decimal => format_float_decimal(f, precision, case, force_decimal),
FloatVariant::Scientific => {
format_float_scientific(f, precision, case, force_decimal)
}
FloatVariant::Shortest => format_float_shortest(f, precision, case, force_decimal),
FloatVariant::Hexadecimal => todo!(),
};
match alignment {
NumberAlignment::Left => write!(writer, "{s:<width$}"),
NumberAlignment::RightSpace => write!(writer, "{s:>width$}"),
NumberAlignment::RightZero => write!(writer, "{s:0>width$}"),
}
.map_err(FormatError::IoError)
}
}
}
}
fn format_float_decimal(
f: f64,
precision: usize,
case: Case,
force_decimal: ForceDecimal,
) -> String {
if !f.is_finite() {
let mut s = format!("{f}");
if case == Case::Lowercase {
s.make_ascii_uppercase();
}
return s;
}
if precision == 0 && force_decimal == ForceDecimal::Yes {
format!("{f:.0}.")
} else {
format!("{f:.*}", precision)
}
}
fn format_float_scientific(
f: f64,
precision: usize,
case: Case,
force_decimal: ForceDecimal,
) -> String {
// If the float is NaN, -Nan, Inf or -Inf, format like any other float
if !f.is_finite() {
let mut s = format!("{f}");
if case == Case::Lowercase {
s.make_ascii_uppercase();
}
return s;
}
let exponent: i32 = f.log10().floor() as i32;
let normalized = f / 10.0_f64.powi(exponent);
let additional_dot = if precision == 0 && ForceDecimal::Yes == force_decimal {
"."
} else {
""
};
let exp_char = match case {
Case::Lowercase => 'e',
Case::Uppercase => 'E',
};
format!(
"{normalized:.*}{additional_dot}{exp_char}{exponent:+03}",
precision
)
}
// TODO: This could be optimized. It's not terribly important though.
fn format_float_shortest(
f: f64,
precision: usize,
case: Case,
force_decimal: ForceDecimal,
) -> String {
let a = format_float_decimal(f, precision, case, force_decimal);
let b = format_float_scientific(f, precision, case, force_decimal);
if a.len() > b.len() {
b
} else {
a
}
}
fn resolve_asterisk(
option: Option<CanAsterisk<usize>>,
args: impl Iterator<Item = FormatArgument>,
) -> Result<Option<usize>, FormatError> {
Ok(match option {
None => None,
Some(CanAsterisk::Asterisk) => {
let arg = next_arg(args)?;
match arg {
FormatArgument::UnsignedInt(u) => match usize::try_from(u) {
Ok(u) => Some(u),
Err(_) => return Err(FormatError::InvalidArgument(arg)),
},
_ => return Err(FormatError::InvalidArgument(arg)),
}
}
Some(CanAsterisk::Fixed(w)) => Some(w),
})
}
fn next_arg(
mut arguments: impl Iterator<Item = FormatArgument>,
) -> Result<FormatArgument, FormatError> {
arguments.next().ok_or(FormatError::NoMoreArguments)
}
fn write_padded(
mut writer: impl Write,
text: impl Display,
width: usize,
pad_zero: bool,
left: bool,
) -> Result<(), FormatError> {
match (left, pad_zero) {
(false, false) => write!(writer, "{text: >width$}"),
(false, true) => write!(writer, "{text:0>width$}"),
// 0 is ignored if we pad left.
(true, _) => write!(writer, "{text: <width$}"),
}
.map_err(FormatError::IoError)
}
fn eat_asterisk_or_number(rest: &mut &[u8]) -> Option<CanAsterisk<usize>> {
if let Some(b'*') = rest.get(0) {
*rest = &rest[1..];
Some(CanAsterisk::Asterisk)
} else {
eat_number(rest).map(CanAsterisk::Fixed)
}
}
fn eat_number(rest: &mut &[u8]) -> Option<usize> {
match rest.iter().position(|b| !b.is_ascii_digit()) {
None | Some(0) => None,
Some(i) => {
// TODO: This might need to handle errors better
// For example in case of overflow.
let parsed = std::str::from_utf8(&rest[..i]).unwrap().parse().unwrap();
*rest = &rest[i..];
Some(parsed)
}
}
}

View file

@ -1,175 +0,0 @@
//! Main entry point for our implementation of printf.
//!
//! The [`printf`] and [`sprintf`] closely match the behavior of the
//! corresponding C functions: the former renders a formatted string
//! to stdout, the latter renders to a new [`String`] object.
use crate::display::Quotable;
use crate::error::{UResult, USimpleError};
use crate::features::tokenize::sub::SubParser;
use crate::features::tokenize::token::Token;
use crate::features::tokenize::unescaped_text::UnescapedText;
use crate::show_warning;
use itertools::put_back_n;
use std::io::{stdout, Cursor, Write};
use std::iter::Peekable;
use std::slice::Iter;
/// Memo runner of printf
/// Takes a format string and arguments
/// 1. tokenize format string into tokens, consuming
/// any subst. arguments along the way.
/// 2. feeds remaining arguments into function
/// that prints tokens.
struct Memo {
tokens: Vec<Token>,
}
fn warn_excess_args(first_arg: &str) {
show_warning!(
"ignoring excess arguments, starting with {}",
first_arg.quote()
);
}
impl Memo {
fn new<W>(
writer: &mut W,
pf_string: &str,
pf_args_it: &mut Peekable<Iter<String>>,
) -> UResult<Self>
where
W: Write,
{
let mut pm = Self { tokens: Vec::new() };
let mut it = put_back_n(pf_string.chars());
let mut has_sub = false;
loop {
if let Some(x) = UnescapedText::from_it_core(writer, &mut it, false) {
pm.tokens.push(x);
}
if let Some(x) = SubParser::from_it(writer, &mut it, pf_args_it)? {
if !has_sub {
has_sub = true;
}
pm.tokens.push(x);
}
if let Some(x) = it.next() {
it.put_back(x);
} else {
break;
}
}
if !has_sub {
let mut drain = false;
if let Some(first_arg) = pf_args_it.peek() {
warn_excess_args(first_arg);
drain = true;
}
if drain {
loop {
// drain remaining args;
if pf_args_it.next().is_none() {
break;
}
}
}
}
Ok(pm)
}
fn apply<W>(&self, writer: &mut W, pf_args_it: &mut Peekable<Iter<String>>)
where
W: Write,
{
for tkn in &self.tokens {
tkn.write(writer, pf_args_it);
}
}
fn run_all<W>(writer: &mut W, pf_string: &str, pf_args: &[String]) -> UResult<()>
where
W: Write,
{
let mut arg_it = pf_args.iter().peekable();
let pm = Self::new(writer, pf_string, &mut arg_it)?;
loop {
if arg_it.peek().is_none() {
return Ok(());
}
pm.apply(writer, &mut arg_it);
}
}
}
/// Write a formatted string to stdout.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`sprintf`], which creates a new formatted [`String`].
///
/// # Examples
///
/// ```rust
/// use uucore::memo::printf;
///
/// printf("hello %s", &["world".to_string()]).unwrap();
/// // prints "hello world"
/// ```
pub fn printf(format_string: &str, args: &[String]) -> UResult<()> {
let mut writer = stdout();
Memo::run_all(&mut writer, format_string, args)
}
/// Create a new formatted string.
///
/// `format_string` contains the template and `args` contains the
/// arguments to render into the template.
///
/// See also [`printf`], which prints to stdout.
///
/// # Examples
///
/// ```rust
/// use uucore::memo::sprintf;
///
/// let s = sprintf("hello %s", &["world".to_string()]).unwrap();
/// assert_eq!(s, "hello world".to_string());
/// ```
pub fn sprintf(format_string: &str, args: &[String]) -> UResult<String> {
let mut writer = Cursor::new(vec![]);
Memo::run_all(&mut writer, format_string, args)?;
let buf = writer.into_inner();
match String::from_utf8(buf) {
Ok(s) => Ok(s),
Err(e) => Err(USimpleError::new(
1,
format!("failed to parse formatted string as UTF-8: {e}"),
)),
}
}
#[cfg(test)]
mod tests {
use crate::memo::sprintf;
#[test]
fn test_sprintf_smoke() {
assert_eq!(sprintf("", &[]).unwrap(), "".to_string());
}
#[test]
fn test_sprintf_no_args() {
assert_eq!(
sprintf("hello world", &[]).unwrap(),
"hello world".to_string()
);
}
#[test]
fn test_sprintf_string() {
assert_eq!(
sprintf("hello %s", &["world".to_string()]).unwrap(),
"hello world".to_string()
);
}
}

View file

@ -1,5 +0,0 @@
#[allow(clippy::module_inception)]
mod num_format;
pub mod sub;
pub mod token;
pub mod unescaped_text;

View file

@ -1,43 +0,0 @@
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
//! Primitives used by Sub Tokenizer
//! and num_format modules
#[derive(Clone)]
pub enum FieldType {
Strf,
Floatf,
CninetyNineHexFloatf,
Scif,
Decf,
Intf,
Charf,
}
// #[allow(non_camel_case_types)]
// pub enum FChar {
// d,
// e,
// E,
// i,
// f,
// F,
// g,
// G,
// u,
// x,
// X,
// o
// }
//
// a Sub Tokens' fields are stored
// as a single object so they can be more simply
// passed by ref to num_format in a Sub method
#[derive(Clone)]
pub struct FormatField<'a> {
pub min_width: Option<isize>,
pub second_field: Option<u32>,
pub field_char: &'a char,
pub field_type: &'a FieldType,
pub orig: &'a String,
}

View file

@ -1,59 +0,0 @@
//! Primitives used by num_format and sub_modules.
//! never dealt with above (e.g. Sub Tokenizer never uses these)
use crate::{display::Quotable, show_error};
use itertools::{put_back_n, PutBackN};
use std::str::Chars;
use super::format_field::FormatField;
// contains the rough ingredients to final
// output for a number, organized together
// to allow for easy generalization of output manipulation
// (e.g. max number of digits after decimal)
#[derive(Default)]
pub struct FormatPrimitive {
pub prefix: Option<String>,
pub pre_decimal: Option<String>,
pub post_decimal: Option<String>,
pub suffix: Option<String>,
}
#[derive(Clone, PartialEq, Eq)]
pub enum Base {
Ten = 10,
Hex = 16,
Octal = 8,
}
// information from the beginning of a numeric argument
// the precedes the beginning of a numeric value
pub struct InitialPrefix {
pub radix_in: Base,
pub sign: i8,
pub offset: usize,
}
pub trait Formatter {
// return a FormatPrimitive for
// particular field char(s), given the argument
// string and prefix information (sign, radix)
fn get_primitive(
&self,
field: &FormatField,
in_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive>;
// return a string from a FormatPrimitive,
// given information about the field
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String;
}
pub fn get_it_at(offset: usize, str_in: &str) -> PutBackN<Chars> {
put_back_n(str_in[offset..].chars())
}
// TODO: put this somewhere better
pub fn warn_incomplete_conv(pf_arg: &str) {
// important: keep println here not print
show_error!("{}: value not completely converted", pf_arg.maybe_quote());
}

View file

@ -1,270 +0,0 @@
// spell-checker:ignore (ToDO) arrnum arr_num mult basenum bufferval refd vals arrfloat conv intermed addl
pub fn arrnum_int_mult(arr_num: &[u8], basenum: u8, base_ten_int_fact: u8) -> Vec<u8> {
let mut carry: u16 = 0;
let mut rem: u16;
let mut new_amount: u16;
let fact: u16 = u16::from(base_ten_int_fact);
let base: u16 = u16::from(basenum);
let mut ret_rev: Vec<u8> = Vec::new();
let mut it = arr_num.iter().rev();
loop {
let i = it.next();
match i {
Some(u) => {
new_amount = (u16::from(*u) * fact) + carry;
rem = new_amount % base;
carry = (new_amount - rem) / base;
ret_rev.push(rem as u8);
}
None => {
while carry != 0 {
rem = carry % base;
carry = (carry - rem) / base;
ret_rev.push(rem as u8);
}
break;
}
}
}
let ret: Vec<u8> = ret_rev.into_iter().rev().collect();
ret
}
#[allow(dead_code)]
pub struct Remainder<'a> {
pub position: usize,
pub replace: Vec<u8>,
pub arr_num: &'a Vec<u8>,
}
#[allow(dead_code)]
pub struct DivOut<'a> {
pub quotient: u8,
pub remainder: Remainder<'a>,
}
#[allow(dead_code)]
pub fn arrnum_int_div_step<'a>(
rem_in: &'a Remainder,
radix_in: u8,
base_ten_int_divisor: u8,
after_decimal: bool,
) -> DivOut<'a> {
let mut rem_out = Remainder {
position: rem_in.position,
replace: Vec::new(),
arr_num: rem_in.arr_num,
};
let mut bufferval: u16 = 0;
let base: u16 = u16::from(radix_in);
let divisor: u16 = u16::from(base_ten_int_divisor);
let mut traversed = 0;
let mut quotient = 0;
let refd_vals = &rem_in.arr_num[rem_in.position + rem_in.replace.len()..];
let mut it_replace = rem_in.replace.iter();
let mut it_f = refd_vals.iter();
loop {
let u = match it_replace.next() {
Some(u_rep) => u16::from(*u_rep),
None => match it_f.next() {
Some(u_orig) => u16::from(*u_orig),
None => {
if !after_decimal {
break;
}
0
}
},
};
traversed += 1;
bufferval += u;
if bufferval > divisor {
while bufferval >= divisor {
quotient += 1;
bufferval -= divisor;
}
rem_out.replace = if bufferval == 0 {
Vec::new()
} else {
let remainder_as_arrnum = unsigned_to_arrnum(bufferval);
base_conv_vec(&remainder_as_arrnum, 10, radix_in)
};
rem_out.position += 1 + (traversed - rem_out.replace.len());
break;
} else {
bufferval *= base;
}
}
DivOut {
quotient,
remainder: rem_out,
}
}
pub fn arrnum_int_add(arrnum: &[u8], basenum: u8, base_ten_int_term: u8) -> Vec<u8> {
let mut carry: u16 = u16::from(base_ten_int_term);
let mut rem: u16;
let mut new_amount: u16;
let base: u16 = u16::from(basenum);
let mut ret_rev: Vec<u8> = Vec::new();
let mut it = arrnum.iter().rev();
loop {
let i = it.next();
match i {
Some(u) => {
new_amount = u16::from(*u) + carry;
rem = new_amount % base;
carry = (new_amount - rem) / base;
ret_rev.push(rem as u8);
}
None => {
while carry != 0 {
rem = carry % base;
carry = (carry - rem) / base;
ret_rev.push(rem as u8);
}
break;
}
}
}
let ret: Vec<u8> = ret_rev.into_iter().rev().collect();
ret
}
pub fn base_conv_vec(src: &[u8], radix_src: u8, radix_dest: u8) -> Vec<u8> {
let mut result = vec![0];
for i in src {
result = arrnum_int_mult(&result, radix_dest, radix_src);
result = arrnum_int_add(&result, radix_dest, *i);
}
result
}
#[allow(dead_code)]
pub fn unsigned_to_arrnum(src: u16) -> Vec<u8> {
let mut result: Vec<u8> = Vec::new();
let mut src_tmp: u16 = src;
while src_tmp > 0 {
result.push((src_tmp % 10) as u8);
src_tmp /= 10;
}
result.reverse();
result
}
// temporary needs-improvement-function
pub fn base_conv_float(src: &[u8], radix_src: u8, _radix_dest: u8) -> f64 {
// it would require a lot of addl code
// to implement this for arbitrary string input.
// until then, the below operates as an outline
// of how it would work.
let mut factor: f64 = 1_f64;
let radix_src_float: f64 = f64::from(radix_src);
let mut r: f64 = 0_f64;
for (i, u) in src.iter().enumerate() {
if i > 15 {
break;
}
factor /= radix_src_float;
r += factor * f64::from(*u);
}
r
}
pub fn str_to_arrnum(src: &str, radix_def_src: &dyn RadixDef) -> Vec<u8> {
let mut intermed_in: Vec<u8> = Vec::new();
for c in src.chars() {
#[allow(clippy::single_match)]
match radix_def_src.parse_char(c) {
Some(u) => {
intermed_in.push(u);
}
None => {} //todo err msg on incorrect
}
}
intermed_in
}
pub fn arrnum_to_str(src: &[u8], radix_def_dest: &dyn RadixDef) -> String {
let mut str_out = String::new();
for u in src.iter() {
#[allow(clippy::single_match)]
match radix_def_dest.format_u8(*u) {
Some(c) => {
str_out.push(c);
}
None => {} //todo
}
}
str_out
}
pub fn base_conv_str(
src: &str,
radix_def_src: &dyn RadixDef,
radix_def_dest: &dyn RadixDef,
) -> String {
let intermed_in: Vec<u8> = str_to_arrnum(src, radix_def_src);
let intermed_out = base_conv_vec(
&intermed_in,
radix_def_src.get_max(),
radix_def_dest.get_max(),
);
arrnum_to_str(&intermed_out, radix_def_dest)
}
pub trait RadixDef {
fn get_max(&self) -> u8;
fn parse_char(&self, x: char) -> Option<u8>;
fn format_u8(&self, x: u8) -> Option<char>;
}
pub struct RadixTen;
const ZERO_ASC: u8 = b'0';
const UPPER_A_ASC: u8 = b'A';
const LOWER_A_ASC: u8 = b'a';
impl RadixDef for RadixTen {
fn get_max(&self) -> u8 {
10
}
fn parse_char(&self, c: char) -> Option<u8> {
match c {
'0'..='9' => Some(c as u8 - ZERO_ASC),
_ => None,
}
}
fn format_u8(&self, u: u8) -> Option<char> {
match u {
0..=9 => Some((ZERO_ASC + u) as char),
_ => None,
}
}
}
pub struct RadixHex;
impl RadixDef for RadixHex {
fn get_max(&self) -> u8 {
16
}
fn parse_char(&self, c: char) -> Option<u8> {
match c {
'0'..='9' => Some(c as u8 - ZERO_ASC),
'A'..='F' => Some(c as u8 + 10 - UPPER_A_ASC),
'a'..='f' => Some(c as u8 + 10 - LOWER_A_ASC),
_ => None,
}
}
fn format_u8(&self, u: u8) -> Option<char> {
match u {
0..=9 => Some((ZERO_ASC + u) as char),
10..=15 => Some((UPPER_A_ASC + (u - 10)) as char),
_ => None,
}
}
}
mod tests;

View file

@ -1,56 +0,0 @@
// spell-checker:ignore (ToDO) arrnum mult
#[cfg(test)]
use super::*;
#[test]
fn test_arrnum_int_mult() {
// (in base 10) 12 * 4 = 48
let factor: Vec<u8> = vec![1, 2];
let base_num = 10;
let base_ten_int_fact: u8 = 4;
let should_output: Vec<u8> = vec![4, 8];
let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact);
assert!(product == should_output);
}
#[test]
fn test_arrnum_int_non_base_10() {
// (in base 3)
// 5 * 4 = 20
let factor: Vec<u8> = vec![1, 2];
let base_num = 3;
let base_ten_int_fact: u8 = 4;
let should_output: Vec<u8> = vec![2, 0, 2];
let product = arrnum_int_mult(&factor, base_num, base_ten_int_fact);
assert!(product == should_output);
}
#[test]
fn test_arrnum_int_div_short_circuit() {
// (
let arrnum: Vec<u8> = vec![5, 5, 5, 5, 0];
let base_num = 10;
let base_ten_int_divisor: u8 = 41;
let remainder_passed_in = Remainder {
position: 1,
replace: vec![1, 3],
arr_num: &arrnum,
};
// the "replace" should mean the number being divided
// is 1350, the first time you can get 41 to go into
// 1350, its at 135, where you can get a quotient of
// 3 and a remainder of 12;
let quotient_should_be: u8 = 3;
let remainder_position_should_be: usize = 3;
let remainder_replace_should_be = vec![1, 2];
let result = arrnum_int_div_step(&remainder_passed_in, base_num, base_ten_int_divisor, false);
assert!(quotient_should_be == result.quotient);
assert!(remainder_position_should_be == result.remainder.position);
assert!(remainder_replace_should_be == result.remainder.replace);
}

View file

@ -1,115 +0,0 @@
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
// spell-checker:ignore (ToDO) arrnum
//! formatter for %a %F C99 Hex-floating-point subs
use super::super::format_field::FormatField;
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
use super::base_conv;
use super::base_conv::RadixDef;
use super::float_common::{primitive_to_str_common, FloatAnalysis};
#[derive(Default)]
pub struct CninetyNineHexFloatf {
#[allow(dead_code)]
as_num: f64,
}
impl CninetyNineHexFloatf {
pub fn new() -> Self {
Self::default()
}
}
impl Formatter for CninetyNineHexFloatf {
fn get_primitive(
&self,
field: &FormatField,
initial_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive> {
let second_field = field.second_field.unwrap_or(6) + 1;
let analysis = FloatAnalysis::analyze(
str_in,
initial_prefix,
Some(second_field as usize),
None,
true,
);
let f = get_primitive_hex(
initial_prefix,
&str_in[initial_prefix.offset..],
&analysis,
second_field as usize,
*field.field_char == 'A',
);
Some(f)
}
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
primitive_to_str_common(prim, &field)
}
}
// c99 hex has unique requirements of all floating point subs in pretty much every part of building a primitive, from prefix and suffix to need for base conversion (in all other cases if you don't have decimal you must have decimal, here it's the other way around)
// on the todo list is to have a trait for get_primitive that is implemented by each float formatter and can override a default. when that happens we can take the parts of get_primitive_dec specific to dec and spin them out to their own functions that can be overridden.
fn get_primitive_hex(
initial_prefix: &InitialPrefix,
_str_in: &str,
_analysis: &FloatAnalysis,
_last_dec_place: usize,
capitalized: bool,
) -> FormatPrimitive {
let prefix = Some(String::from(if initial_prefix.sign == -1 {
"-0x"
} else {
"0x"
}));
// TODO actual conversion, make sure to get back mantissa.
// for hex to hex, it's really just a matter of moving the
// decimal point and calculating the mantissa by its initial
// position and its moves, with every position counting for
// the addition or subtraction of 4 (2**4, because 4 bits in a hex digit)
// to the exponent.
// decimal's going to be a little more complicated. correct simulation
// of glibc will require after-decimal division to a specified precision.
// the difficult part of this (arrnum_int_div_step) is already implemented.
// the hex float name may be a bit misleading in terms of how to go about the
// conversion. The best way to do it is to just convert the float number
// directly to base 2 and then at the end translate back to hex.
let mantissa = 0;
let suffix = Some({
let ind = if capitalized { "P" } else { "p" };
if mantissa >= 0 {
format!("{ind}+{mantissa}")
} else {
format!("{ind}{mantissa}")
}
});
FormatPrimitive {
prefix,
suffix,
..Default::default()
}
}
#[allow(dead_code)]
fn to_hex(src: &str, before_decimal: bool) -> String {
let radix_ten = base_conv::RadixTen;
let radix_hex = base_conv::RadixHex;
if before_decimal {
base_conv::base_conv_str(src, &radix_ten, &radix_hex)
} else {
let as_arrnum_ten = base_conv::str_to_arrnum(src, &radix_ten);
let s = format!(
"{}",
base_conv::base_conv_float(&as_arrnum_ten, radix_ten.get_max(), radix_hex.get_max())
);
if s.len() > 2 {
String::from(&s[2..])
} else {
// zero
s
}
}
}

View file

@ -1,185 +0,0 @@
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
//! formatter for %g %G decimal subs
use super::super::format_field::FormatField;
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis};
const SIGNIFICANT_FIGURES: usize = 6;
// Parse a numeric string as the nearest integer with a given significance.
// This is a helper function for round().
// Examples:
// round_to_significance("456", 1) == 500
// round_to_significance("456", 2) == 460
// round_to_significance("456", 9) == 456
fn round_to_significance(input: &str, significant_figures: usize) -> u32 {
if significant_figures < input.len() {
// If the input has too many digits, use a float intermediary
// to round it before converting to an integer. Otherwise,
// converting straight to integer will truncate.
// There might be a cleaner way to do this...
let digits = &input[..significant_figures + 1];
let float_representation = digits.parse::<f32>().unwrap();
(float_representation / 10.0).round() as u32
} else {
input.parse::<u32>().unwrap_or(0)
}
}
// Removing trailing zeroes, expressing the result as an integer where
// possible. This is a helper function for round().
fn truncate(mut format: FormatPrimitive) -> FormatPrimitive {
if let Some(ref post_dec) = format.post_decimal {
let trimmed = post_dec.trim_end_matches('0');
if trimmed.is_empty() {
// If there are no nonzero digits after the decimal point,
// use integer formatting by clearing post_decimal and suffix.
format.post_decimal = Some(String::new());
if format.suffix == Some("e+00".into()) {
format.suffix = Some(String::new());
}
} else if trimmed.len() != post_dec.len() {
// Otherwise, update the format to remove only the trailing
// zeroes (e.g. "4.50" becomes "4.5", not "4"). If there were
// no trailing zeroes, do nothing.
format.post_decimal = Some(trimmed.to_owned());
}
}
format
}
// Round a format to six significant figures and remove trailing zeroes.
fn round(mut format: FormatPrimitive) -> FormatPrimitive {
let mut significant_digits_remaining = SIGNIFICANT_FIGURES;
// First, take as many significant digits as possible from pre_decimal,
if format.pre_decimal.is_some() {
let input = format.pre_decimal.as_ref().unwrap();
let rounded = round_to_significance(input, significant_digits_remaining);
let mut rounded_str = rounded.to_string();
significant_digits_remaining -= rounded_str.len();
// If the pre_decimal has exactly enough significant digits,
// round the input to the nearest integer. If the first
// post_decimal digit is 5 or higher, round up by incrementing
// the pre_decimal number. Otherwise, use the pre_decimal as-is.
if significant_digits_remaining == 0 {
if let Some(digits) = &format.post_decimal {
if digits.chars().next().unwrap_or('0') >= '5' {
let rounded = rounded + 1;
rounded_str = rounded.to_string();
}
}
}
format.pre_decimal = Some(rounded_str);
}
// If no significant digits remain, or there's no post_decimal to
// round, return the rounded pre_decimal value with no post_decimal.
// Otherwise, round the post_decimal to the remaining significance.
if significant_digits_remaining == 0 {
format.post_decimal = Some(String::new());
} else if let Some(input) = format.post_decimal {
let leading_zeroes = input.len() - input.trim_start_matches('0').len();
let digits = &input[leading_zeroes..];
// In the post_decimal, leading zeroes are significant. "01.0010"
// has one significant digit in pre_decimal, and 3 from post_decimal.
let mut post_decimal_str = String::with_capacity(significant_digits_remaining);
for _ in 0..leading_zeroes {
post_decimal_str.push('0');
}
if leading_zeroes < significant_digits_remaining {
// After significant leading zeroes, round the remaining digits
// to any remaining significance.
let rounded = round_to_significance(digits, significant_digits_remaining);
post_decimal_str.push_str(&rounded.to_string());
} else if leading_zeroes == significant_digits_remaining
&& digits.chars().next().unwrap_or('0') >= '5'
{
// If necessary, round up the post_decimal ("1.000009" should
// round to 1.00001, instead of truncating after the last
// significant leading zero).
post_decimal_str.pop();
post_decimal_str.push('1');
} else {
// If the rounded post_decimal is entirely zeroes, discard
// it and use integer formatting instead.
post_decimal_str = String::new();
}
format.post_decimal = Some(post_decimal_str);
}
truncate(format)
}
// Given an exponent used in scientific notation, return whether the
// number is small enough to be expressed as a decimal instead. "Small
// enough" is based only on the number's magnitude, not the length of
// any string representation.
fn should_represent_as_decimal(suffix: &Option<String>) -> bool {
match suffix {
Some(exponent) => {
if exponent.chars().nth(1) == Some('-') {
exponent < &"e-05".into()
} else {
exponent < &"e+06".into()
}
}
None => true,
}
}
pub struct Decf;
impl Decf {
pub fn new() -> Self {
Self
}
}
impl Formatter for Decf {
fn get_primitive(
&self,
field: &FormatField,
initial_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive> {
let second_field = field.second_field.unwrap_or(6) + 1;
// default to scif interpretation so as to not truncate input vals
// (that would be displayed in scif) based on relation to decimal place
let analysis = FloatAnalysis::analyze(
str_in,
initial_prefix,
Some(second_field as usize + 1),
None,
false,
);
let mut f_dec = get_primitive_dec(
initial_prefix,
&str_in[initial_prefix.offset..],
&analysis,
second_field as usize,
Some(*field.field_char == 'G'),
);
if should_represent_as_decimal(&f_dec.suffix) {
// Use decimal formatting instead of scientific notation
// if the input's magnitude is small.
f_dec = get_primitive_dec(
initial_prefix,
&str_in[initial_prefix.offset..],
&analysis,
second_field as usize,
None,
);
}
Some(round(f_dec))
}
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
primitive_to_str_common(prim, &field)
}
}

View file

@ -1,377 +0,0 @@
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
// spell-checker:ignore (ToDO) arrnum
use super::super::format_field::FormatField;
use super::super::formatter::{
get_it_at, warn_incomplete_conv, Base, FormatPrimitive, InitialPrefix,
};
use super::base_conv;
use super::base_conv::RadixDef;
// if the memory, copy, and comparison cost of chars
// becomes an issue, we can always operate in vec<u8> here
// rather than just at de_hex
pub struct FloatAnalysis {
pub len_important: usize,
// none means no decimal point.
pub decimal_pos: Option<usize>,
pub follow: Option<char>,
}
fn has_enough_digits(
hex_input: bool,
hex_output: bool,
string_position: usize,
starting_position: usize,
limit: usize,
) -> bool {
// -1s are for rounding
if hex_output {
if hex_input {
(string_position - 1) - starting_position >= limit
} else {
false //undecidable without converting
}
} else if hex_input {
(((string_position - 1) - starting_position) * 9) / 8 >= limit
} else {
(string_position - 1) - starting_position >= limit
}
}
impl FloatAnalysis {
#[allow(clippy::cognitive_complexity)]
pub fn analyze(
str_in: &str,
initial_prefix: &InitialPrefix,
max_sd_opt: Option<usize>,
max_after_dec_opt: Option<usize>,
hex_output: bool,
) -> Self {
// this fn assumes
// the input string
// has no leading spaces or 0s
let str_it = get_it_at(initial_prefix.offset, str_in);
let mut ret = Self {
len_important: 0,
decimal_pos: None,
follow: None,
};
let hex_input = match initial_prefix.radix_in {
Base::Hex => true,
Base::Ten => false,
Base::Octal => {
panic!("this should never happen: floats should never receive octal input");
}
};
let mut i = 0;
let mut pos_before_first_nonzero_after_decimal: Option<usize> = None;
for c in str_it {
match c {
e @ ('0'..='9' | 'A'..='F' | 'a'..='f') => {
if !hex_input {
match e {
'0'..='9' => {}
_ => {
warn_incomplete_conv(str_in);
break;
}
}
}
if ret.decimal_pos.is_some()
&& pos_before_first_nonzero_after_decimal.is_none()
&& e != '0'
{
pos_before_first_nonzero_after_decimal = Some(i - 1);
}
if let Some(max_sd) = max_sd_opt {
if i == max_sd {
// follow is used in cases of %g
// where the character right after the last
// sd is considered is rounded affecting
// the previous digit in 1/2 of instances
ret.follow = Some(e);
} else if ret.decimal_pos.is_some() && i > max_sd {
break;
}
}
if let Some(max_after_dec) = max_after_dec_opt {
if let Some(p) = ret.decimal_pos {
if has_enough_digits(hex_input, hex_output, i, p, max_after_dec) {
break;
}
}
} else if let Some(max_sd) = max_sd_opt {
if let Some(p) = pos_before_first_nonzero_after_decimal {
if has_enough_digits(hex_input, hex_output, i, p, max_sd) {
break;
}
}
}
}
'.' => {
if ret.decimal_pos.is_none() {
ret.decimal_pos = Some(i);
} else {
warn_incomplete_conv(str_in);
break;
}
}
_ => {
warn_incomplete_conv(str_in);
break;
}
};
i += 1;
}
ret.len_important = i;
ret
}
}
fn de_hex(src: &str, before_decimal: bool) -> String {
let radix_ten = base_conv::RadixTen;
let radix_hex = base_conv::RadixHex;
if before_decimal {
base_conv::base_conv_str(src, &radix_hex, &radix_ten)
} else {
let as_arrnum_hex = base_conv::str_to_arrnum(src, &radix_hex);
let s = format!(
"{}",
base_conv::base_conv_float(&as_arrnum_hex, radix_hex.get_max(), radix_ten.get_max())
);
if s.len() > 2 {
String::from(&s[2..])
} else {
// zero
s
}
}
}
// takes a string in,
// truncates to a position,
// bumps the last digit up one,
// and if the digit was nine
// propagate to the next, etc.
// If before the decimal and the most
// significant digit is a 9, it becomes a 1
fn _round_str_from(in_str: &str, position: usize, before_dec: bool) -> (String, bool) {
let mut it = in_str[0..position].chars();
let mut rev = String::new();
let mut i = position;
let mut finished_in_dec = false;
while let Some(c) = it.next_back() {
i -= 1;
match c {
'9' => {
// If we're before the decimal
// and on the most significant digit,
// round 9 to 1, else to 0.
if before_dec && i == 0 {
rev.push('1');
} else {
rev.push('0');
}
}
e => {
rev.push(((e as u8) + 1) as char);
finished_in_dec = true;
break;
}
}
}
let mut fwd = String::from(&in_str[0..i]);
for ch in rev.chars().rev() {
fwd.push(ch);
}
(fwd, finished_in_dec)
}
fn round_terminal_digit(
before_dec: String,
after_dec: String,
position: usize,
) -> (String, String, bool) {
if position < after_dec.len() {
let digit_at_pos: char;
{
digit_at_pos = after_dec[position..=position].chars().next().expect("");
}
if let '5'..='9' = digit_at_pos {
let (new_after_dec, finished_in_dec) = _round_str_from(&after_dec, position, false);
if finished_in_dec {
return (before_dec, new_after_dec, false);
} else {
let (new_before_dec, _) = _round_str_from(&before_dec, before_dec.len(), true);
let mut dec_place_chg = false;
let mut before_dec_chars = new_before_dec.chars();
if before_dec_chars.next() == Some('1') && before_dec_chars.all(|c| c == '0') {
// If the first digit is a one and remaining are zeros, we have
// rounded to a new decimal place, so the decimal place must be updated.
// Only update decimal place if the before decimal != 0
dec_place_chg = before_dec != "0";
}
return (new_before_dec, new_after_dec, dec_place_chg);
}
// TODO
}
}
(before_dec, after_dec, false)
}
#[allow(clippy::cognitive_complexity)]
pub fn get_primitive_dec(
initial_prefix: &InitialPrefix,
str_in: &str,
analysis: &FloatAnalysis,
last_dec_place: usize,
sci_mode: Option<bool>,
) -> FormatPrimitive {
let mut f = FormatPrimitive::default();
// add negative sign section
if initial_prefix.sign == -1 {
f.prefix = Some(String::from("-"));
}
// assign the digits before and after the decimal points
// to separate slices. If no digits after decimal point,
// assign 0
let (mut first_segment_raw, second_segment_raw) = match analysis.decimal_pos {
Some(pos) => (&str_in[..pos], &str_in[pos + 1..]),
None => (str_in, "0"),
};
if first_segment_raw.is_empty() {
first_segment_raw = "0";
}
// convert to string, de_hexifying if input is in hex // spell-checker:disable-line
let (first_segment, second_segment) = match initial_prefix.radix_in {
Base::Hex => (
de_hex(first_segment_raw, true),
de_hex(second_segment_raw, false),
),
_ => (
String::from(first_segment_raw),
String::from(second_segment_raw),
),
};
let (pre_dec_unrounded, post_dec_unrounded, mut mantissa) = if sci_mode.is_some() {
if first_segment.len() > 1 {
let mut post_dec = String::from(&first_segment[1..]);
post_dec.push_str(&second_segment);
(
String::from(&first_segment[0..1]),
post_dec,
first_segment.len() as isize - 1,
)
} else {
match first_segment
.chars()
.next()
.expect("float_common: no chars in first segment.")
{
'0' => {
let it = second_segment.chars().enumerate();
let mut m: isize = 0;
let mut pre = String::from("0");
let mut post = String::from("0");
for (i, c) in it {
match c {
'0' => {}
_ => {
m = -((i as isize) + 1);
pre = String::from(&second_segment[i..=i]);
post = String::from(&second_segment[i + 1..]);
break;
}
}
}
(pre, post, m)
}
_ => (first_segment, second_segment, 0),
}
}
} else {
(first_segment, second_segment, 0)
};
let (pre_dec_draft, post_dec_draft, dec_place_chg) =
round_terminal_digit(pre_dec_unrounded, post_dec_unrounded, last_dec_place - 1);
f.post_decimal = Some(post_dec_draft);
if let Some(capitalized) = sci_mode {
let si_ind = if capitalized { 'E' } else { 'e' };
// Increase the mantissa if we're adding a decimal place
if dec_place_chg {
mantissa += 1;
}
f.suffix = Some(if mantissa >= 0 {
format!("{si_ind}+{mantissa:02}")
} else {
// negative sign is considered in format!s
// leading zeroes
format!("{si_ind}{mantissa:03}")
});
f.pre_decimal = Some(pre_dec_draft);
} else if dec_place_chg {
// We've rounded up to a new decimal place so append 0
f.pre_decimal = Some(pre_dec_draft + "0");
} else {
f.pre_decimal = Some(pre_dec_draft);
}
f
}
pub fn primitive_to_str_common(prim: &FormatPrimitive, field: &FormatField) -> String {
let mut final_str = String::new();
if let Some(ref prefix) = prim.prefix {
final_str.push_str(prefix);
}
match prim.pre_decimal {
Some(ref pre_decimal) => {
final_str.push_str(pre_decimal);
}
None => {
panic!(
"error, format primitives provided to int, will, incidentally under correct \
behavior, always have a pre_dec value."
);
}
}
let decimal_places = field.second_field.unwrap_or(6);
match prim.post_decimal {
Some(ref post_decimal) => {
if !post_decimal.is_empty() && decimal_places > 0 {
final_str.push('.');
let len_avail = post_decimal.len() as u32;
if decimal_places >= len_avail {
// println!("dec {}, len avail {}", decimal_places, len_avail);
final_str.push_str(post_decimal);
if *field.field_char != 'g' && *field.field_char != 'G' {
let diff = decimal_places - len_avail;
for _ in 0..diff {
final_str.push('0');
}
}
} else {
// println!("printing to only {}", decimal_places);
final_str.push_str(&post_decimal[0..decimal_places as usize]);
}
}
}
None => {
panic!(
"error, format primitives provided to int, will, incidentally under correct \
behavior, always have a pre_dec value."
);
}
}
if let Some(ref suffix) = prim.suffix {
final_str.push_str(suffix);
}
final_str
}

View file

@ -1,43 +0,0 @@
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
// spell-checker:ignore (ToDO) arrnum
//! formatter for %f %F common-notation floating-point subs
use super::super::format_field::FormatField;
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis};
#[derive(Default)]
pub struct Floatf;
impl Floatf {
pub fn new() -> Self {
Self
}
}
impl Formatter for Floatf {
fn get_primitive(
&self,
field: &FormatField,
initial_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive> {
let second_field = field.second_field.unwrap_or(6) + 1;
let analysis = FloatAnalysis::analyze(
str_in,
initial_prefix,
None,
Some(second_field as usize),
false,
);
let f = get_primitive_dec(
initial_prefix,
&str_in[initial_prefix.offset..],
&analysis,
second_field as usize,
None,
);
Some(f)
}
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
primitive_to_str_common(prim, &field)
}
}

View file

@ -1,282 +0,0 @@
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
// spell-checker:ignore (ToDO) arrnum
//! formatter for unsigned and signed int subs
//! unsigned int: %X %x (hex u64) %o (octal u64) %u (base ten u64)
//! signed int: %i %d (both base ten i64)
use super::super::format_field::FormatField;
use super::super::formatter::{
get_it_at, warn_incomplete_conv, Base, FormatPrimitive, Formatter, InitialPrefix,
};
use std::i64;
use std::u64;
#[derive(Default)]
pub struct Intf {
_a: u32,
}
// see the Intf::analyze() function below
struct IntAnalysis {
check_past_max: bool,
past_max: bool,
is_zero: bool,
len_digits: u8,
}
impl Intf {
pub fn new() -> Self {
Self::default()
}
// take a ref to argument string, and basic information
// about prefix (offset, radix, sign), and analyze string
// to gain the IntAnalysis information above
// check_past_max: true if the number *may* be above max,
// but we don't know either way. One of several reasons
// we may have to parse as int.
// past_max: true if the object is past max, false if not
// in the future we should probably combine these into an
// Option<bool>
// is_zero: true if number is zero, false otherwise
// len_digits: length of digits used to create the int
// important, for example, if we run into a non-valid character
#[allow(clippy::cognitive_complexity)]
fn analyze(str_in: &str, signed_out: bool, initial_prefix: &InitialPrefix) -> IntAnalysis {
// the maximum number of digits we could conceivably
// have before the decimal point without exceeding the
// max
let mut str_it = get_it_at(initial_prefix.offset, str_in);
let max_sd_in = if signed_out {
match initial_prefix.radix_in {
Base::Ten => 19,
Base::Octal => 21,
Base::Hex => 16,
}
} else {
match initial_prefix.radix_in {
Base::Ten => 20,
Base::Octal => 22,
Base::Hex => 16,
}
};
let mut ret = IntAnalysis {
check_past_max: false,
past_max: false,
is_zero: false,
len_digits: 0,
};
// todo turn this to a while let now that we know
// no special behavior on EOI break
loop {
let c_opt = str_it.next();
if let Some(c) = c_opt {
match c {
'0'..='9' | 'a'..='f' | 'A'..='F' => {
if ret.len_digits == 0 && c == '0' {
ret.is_zero = true;
} else if ret.is_zero {
ret.is_zero = false;
}
ret.len_digits += 1;
if ret.len_digits == max_sd_in {
if let Some(next_ch) = str_it.next() {
match next_ch {
'0'..='9' => {
ret.past_max = true;
}
_ => {
// force conversion
// to check if its above max.
// todo: spin out convert
// into fn, call it here to try
// read val, on Ok()
// save val for reuse later
// that way on same-base in and out
// we don't needlessly convert int
// to str, we can just copy it over.
ret.check_past_max = true;
str_it.put_back(next_ch);
}
}
if ret.past_max {
break;
}
} else {
ret.check_past_max = true;
}
}
}
_ => {
warn_incomplete_conv(str_in);
break;
}
}
} else {
// breaks on EOL
break;
}
}
ret
}
// get a FormatPrimitive of the maximum value for the field char
// and given sign
fn get_max(field_char: char, sign: i8) -> FormatPrimitive {
let mut fmt_primitive = FormatPrimitive::default();
fmt_primitive.pre_decimal = Some(String::from(match field_char {
'd' | 'i' => match sign {
1 => "9223372036854775807",
_ => {
fmt_primitive.prefix = Some(String::from("-"));
"9223372036854775808"
}
},
'x' | 'X' => "ffffffffffffffff",
'o' => "1777777777777777777777",
/* 'u' | */ _ => "18446744073709551615",
}));
fmt_primitive
}
// conv_from_segment contract:
// 1. takes
// - a string that begins with a non-zero digit, and proceeds
// with zero or more following digits until the end of the string
// - a radix to interpret those digits as
// - a char that communicates:
// whether to interpret+output the string as an i64 or u64
// what radix to write the parsed number as.
// 2. parses it as a rust integral type
// 3. outputs FormatPrimitive with:
// - if the string falls within bounds:
// number parsed and written in the correct radix
// - if the string falls outside bounds:
// for i64 output, the int minimum or int max (depending on sign)
// for u64 output, the u64 max in the output radix
fn conv_from_segment(
segment: &str,
radix_in: Base,
field_char: char,
sign: i8,
) -> FormatPrimitive {
match field_char {
'i' | 'd' => match i64::from_str_radix(segment, radix_in as u32) {
Ok(i) => {
let mut fmt_prim = FormatPrimitive::default();
if sign == -1 {
fmt_prim.prefix = Some(String::from("-"));
}
fmt_prim.pre_decimal = Some(format!("{i}"));
fmt_prim
}
Err(_) => Self::get_max(field_char, sign),
},
_ => match u64::from_str_radix(segment, radix_in as u32) {
Ok(u) => {
let mut fmt_prim = FormatPrimitive::default();
let u_f = if sign == -1 { u64::MAX - (u - 1) } else { u };
fmt_prim.pre_decimal = Some(match field_char {
'X' => format!("{u_f:X}"),
'x' => format!("{u_f:x}"),
'o' => format!("{u_f:o}"),
_ => format!("{u_f}"),
});
fmt_prim
}
Err(_) => Self::get_max(field_char, sign),
},
}
}
}
impl Formatter for Intf {
fn get_primitive(
&self,
field: &FormatField,
initial_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive> {
let begin = initial_prefix.offset;
// get information about the string. see Intf::Analyze
// def above.
let convert_hints = Self::analyze(
str_in,
*field.field_char == 'i' || *field.field_char == 'd',
initial_prefix,
);
// We always will have a format primitive to return
Some(if convert_hints.len_digits == 0 || convert_hints.is_zero {
// if non-digit or end is reached before a non-zero digit
FormatPrimitive {
pre_decimal: Some(String::from("0")),
..Default::default()
}
} else if !convert_hints.past_max {
// if the number is or may be below the bounds limit
let radix_out = match *field.field_char {
'd' | 'i' | 'u' => Base::Ten,
'x' | 'X' => Base::Hex,
/* 'o' | */ _ => Base::Octal,
};
let radix_mismatch = !radix_out.eq(&initial_prefix.radix_in);
let decrease_from_max: bool = initial_prefix.sign == -1 && *field.field_char != 'i';
let end = begin + convert_hints.len_digits as usize;
// convert to int if any one of these is true:
// - number of digits in int indicates it may be past max
// - we're subtracting from the max
// - we're converting the base
if convert_hints.check_past_max || decrease_from_max || radix_mismatch {
// radix of in and out is the same.
let segment = String::from(&str_in[begin..end]);
Self::conv_from_segment(
&segment,
initial_prefix.radix_in.clone(),
*field.field_char,
initial_prefix.sign,
)
} else {
// otherwise just do a straight string copy.
let mut fmt_prim = FormatPrimitive::default();
// this is here and not earlier because
// zero doesn't get a sign, and conv_from_segment
// creates its format primitive separately
if initial_prefix.sign == -1 && *field.field_char == 'i' {
fmt_prim.prefix = Some(String::from("-"));
}
fmt_prim.pre_decimal = Some(String::from(&str_in[begin..end]));
fmt_prim
}
} else {
Self::get_max(*field.field_char, initial_prefix.sign)
})
}
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
let mut final_str: String = String::new();
if let Some(ref prefix) = prim.prefix {
final_str.push_str(prefix);
}
// integral second fields is zero-padded minimum-width
// which gets handled before general minimum-width
match prim.pre_decimal {
Some(ref pre_decimal) => {
if let Some(min) = field.second_field {
let mut i = min;
let len = pre_decimal.len() as u32;
while i > len {
final_str.push('0');
i -= 1;
}
}
final_str.push_str(pre_decimal);
}
None => {
panic!(
"error, format primitives provided to int, will, incidentally under \
correct behavior, always have a pre_dec value."
);
}
}
final_str
}
}

View file

@ -1,9 +0,0 @@
// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety
mod base_conv;
pub mod cninetyninehexfloatf;
pub mod decf;
mod float_common;
pub mod floatf;
pub mod intf;
pub mod scif;

View file

@ -1,43 +0,0 @@
// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety
//! formatter for %e %E scientific notation subs
use super::super::format_field::FormatField;
use super::super::formatter::{FormatPrimitive, Formatter, InitialPrefix};
use super::float_common::{get_primitive_dec, primitive_to_str_common, FloatAnalysis};
#[derive(Default)]
pub struct Scif;
impl Scif {
pub fn new() -> Self {
Self
}
}
impl Formatter for Scif {
fn get_primitive(
&self,
field: &FormatField,
initial_prefix: &InitialPrefix,
str_in: &str,
) -> Option<FormatPrimitive> {
let second_field = field.second_field.unwrap_or(6) + 1;
let analysis = FloatAnalysis::analyze(
str_in,
initial_prefix,
Some(second_field as usize + 1),
None,
false,
);
let f = get_primitive_dec(
initial_prefix,
&str_in[initial_prefix.offset..],
&analysis,
second_field as usize,
Some(*field.field_char == 'E'),
);
Some(f)
}
fn primitive_to_str(&self, prim: &FormatPrimitive, field: FormatField) -> String {
primitive_to_str_common(prim, &field)
}
}

View file

@ -1,4 +0,0 @@
pub mod format_field;
mod formatter;
mod formatters;
pub mod num_format;

View file

@ -1,271 +0,0 @@
// spell-checker:ignore (vars) charf cninetyninehexfloatf decf floatf intf scif strf Cninety
//! handles creating printed output for numeric substitutions
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
use std::env;
use std::vec::Vec;
use crate::display::Quotable;
use crate::{show_error, show_warning};
use super::format_field::{FieldType, FormatField};
use super::formatter::{Base, FormatPrimitive, Formatter, InitialPrefix};
use super::formatters::cninetyninehexfloatf::CninetyNineHexFloatf;
use super::formatters::decf::Decf;
use super::formatters::floatf::Floatf;
use super::formatters::intf::Intf;
use super::formatters::scif::Scif;
pub fn warn_expected_numeric(pf_arg: &str) {
// important: keep println here not print
show_error!("{}: expected a numeric value", pf_arg.maybe_quote());
}
// when character constant arguments have excess characters
// issue a warning when POSIXLY_CORRECT is not set
fn warn_char_constant_ign(remaining_bytes: &[u8]) {
match env::var("POSIXLY_CORRECT") {
Ok(_) => {}
Err(e) => {
if let env::VarError::NotPresent = e {
show_warning!(
"{:?}: character(s) following character \
constant have been ignored",
remaining_bytes
);
}
}
}
}
// this function looks at the first few
// characters of an argument and returns a value if we can learn
// a value from that (e.g. no argument? return 0, char constant? ret value)
fn get_provided(str_in_opt: Option<&String>) -> Option<u8> {
const C_S_QUOTE: u8 = 39;
const C_D_QUOTE: u8 = 34;
match str_in_opt {
Some(str_in) => {
let mut byte_it = str_in.bytes();
if let Some(ch) = byte_it.next() {
match ch {
C_S_QUOTE | C_D_QUOTE => {
Some(match byte_it.next() {
Some(second_byte) => {
let mut ignored: Vec<u8> = Vec::new();
for cont in byte_it {
ignored.push(cont);
}
if !ignored.is_empty() {
warn_char_constant_ign(&ignored);
}
second_byte
}
// no byte after quote
None => {
let so_far = (ch as char).to_string();
warn_expected_numeric(&so_far);
0_u8
}
})
}
// first byte is not quote
_ => None, // no first byte
}
} else {
Some(0_u8)
}
}
None => Some(0),
}
}
// takes a string and returns
// a sign,
// a base,
// and an offset for index after all
// initial spacing, sign, base prefix, and leading zeroes
#[allow(clippy::cognitive_complexity)]
fn get_initial_prefix(str_in: &str, field_type: &FieldType) -> InitialPrefix {
let mut str_it = str_in.chars();
let mut ret = InitialPrefix {
radix_in: Base::Ten,
sign: 1,
offset: 0,
};
let mut top_char = str_it.next();
// skip spaces and ensure top_char is the first non-space char
// (or None if none exists)
while let Some(' ') = top_char {
ret.offset += 1;
top_char = str_it.next();
}
// parse sign
match top_char {
Some('+') => {
ret.offset += 1;
top_char = str_it.next();
}
Some('-') => {
ret.sign = -1;
ret.offset += 1;
top_char = str_it.next();
}
_ => {}
}
// we want to exit with offset being
// the index of the first non-zero
// digit before the decimal point or
// if there is none, the zero before the
// decimal point, or, if there is none,
// the decimal point.
// while we are determining the offset
// we will ensure as a convention
// the offset is always on the first character
// that we are yet unsure if it is the
// final offset. If the zero could be before
// a decimal point we don't move past the zero.
let mut is_hex = false;
if Some('0') == top_char {
if let Some(base) = str_it.next() {
// lead zeroes can only exist in
// octal and hex base
let mut do_clean_lead_zeroes = false;
match base {
'x' | 'X' => {
is_hex = true;
ret.offset += 2;
ret.radix_in = Base::Hex;
do_clean_lead_zeroes = true;
}
e @ '0'..='9' => {
ret.offset += 1;
if let FieldType::Intf = *field_type {
ret.radix_in = Base::Octal;
}
if e == '0' {
do_clean_lead_zeroes = true;
}
}
_ => {}
}
if do_clean_lead_zeroes {
let mut first = true;
for ch_zero in str_it {
// see notes on offset above:
// this is why the offset for octal and decimal numbers
// that reach this branch is 1 even though
// they have already eaten the characters '00'
// this is also why when hex encounters its
// first zero it does not move its offset
// forward because it does not know for sure
// that it's current offset (of that zero)
// is not the final offset,
// whereas at that point octal knows its
// current offset is not the final offset.
match ch_zero {
'0' => {
if !(is_hex && first) {
ret.offset += 1;
}
}
// if decimal, keep last zero if one exists
// (it's possible for last zero to
// not exist at this branch if we're in hex input)
'.' => break,
// other digit, etc.
_ => {
if !(is_hex && first) {
ret.offset += 1;
}
break;
}
}
if first {
first = false;
}
}
}
}
}
ret
}
// this is the function a Sub's print will delegate to
// if it is a numeric field, passing the field details
// and an iterator to the argument
pub fn num_format(field: &FormatField, in_str_opt: Option<&String>) -> Option<String> {
let field_char = field.field_char;
// num format mainly operates by further delegating to one of
// several Formatter structs depending on the field
// see formatter.rs for more details
// to do switch to static dispatch
let formatter: Box<dyn Formatter> = match *field.field_type {
FieldType::Intf => Box::new(Intf::new()),
FieldType::Floatf => Box::new(Floatf::new()),
FieldType::CninetyNineHexFloatf => Box::new(CninetyNineHexFloatf::new()),
FieldType::Scif => Box::new(Scif::new()),
FieldType::Decf => Box::new(Decf::new()),
_ => {
panic!("asked to do num format with non-num field type");
}
};
let prim_opt=
// if we can get an assumed value from looking at the first
// few characters, use that value to create the FormatPrimitive
if let Some(provided_num) = get_provided(in_str_opt) {
let mut tmp = FormatPrimitive::default();
match field_char {
'u' | 'i' | 'd' => {
tmp.pre_decimal = Some(
format!("{provided_num}"));
},
'x' | 'X' => {
tmp.pre_decimal = Some(
format!("{provided_num:x}"));
},
'o' => {
tmp.pre_decimal = Some(
format!("{provided_num:o}"));
},
'e' | 'E' | 'g' | 'G' => {
let as_str = format!("{provided_num}");
let initial_prefix = get_initial_prefix(
&as_str,
field.field_type
);
tmp=formatter.get_primitive(field, &initial_prefix, &as_str)
.expect("err during default provided num");
},
_ => {
tmp.pre_decimal = Some(
format!("{provided_num}"));
tmp.post_decimal = Some(String::from("0"));
}
}
Some(tmp)
} else {
// otherwise we'll interpret the argument as a number
// using the appropriate Formatter
let in_str = in_str_opt.expect(
"please send the devs this message:
\n get_provided is failing to ret as Some(0) on no str ");
// first get information about the beginning of the
// numeric argument that would be useful for
// any formatter (int or float)
let initial_prefix = get_initial_prefix(
in_str,
field.field_type
);
// then get the FormatPrimitive from the Formatter
formatter.get_primitive(field, &initial_prefix, in_str)
};
// if we have a formatPrimitive, print its results
// according to the field-char appropriate Formatter
prim_opt.map(|prim| formatter.primitive_to_str(&prim, field.clone()))
}

View file

@ -1,452 +0,0 @@
// spell-checker:ignore (vars) charf decf floatf intf scif strf Cninety
//! Sub is a token that represents a
//! segment of the format string that is a substitution
//! it is created by Sub's implementation of the Tokenizer trait
//! Subs which have numeric field chars make use of the num_format
//! submodule
use crate::error::{UError, UResult};
use itertools::{put_back_n, PutBackN};
use std::error::Error;
use std::fmt::Display;
use std::io::Write;
use std::iter::Peekable;
use std::process::exit;
use std::slice::Iter;
use std::str::Chars;
// use std::collections::HashSet;
use super::num_format::format_field::{FieldType, FormatField};
use super::num_format::num_format;
use super::token;
use super::unescaped_text::UnescapedText;
const EXIT_ERR: i32 = 1;
#[derive(Debug)]
pub enum SubError {
InvalidSpec(String),
}
impl Display for SubError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
match self {
Self::InvalidSpec(s) => write!(f, "%{s}: invalid conversion specification"),
}
}
}
impl Error for SubError {}
impl UError for SubError {}
fn convert_asterisk_arg_int(asterisk_arg: &str) -> isize {
// this is a costly way to parse the
// args used for asterisk values into integers
// from various bases. Actually doing it correctly
// (going through the pipeline to intf, but returning
// the integer instead of writing it to string and then
// back) is on the refactoring TODO
let field_type = FieldType::Intf;
let field_char = 'i';
let field_info = FormatField {
min_width: Some(0),
second_field: Some(0),
orig: &asterisk_arg.to_string(),
field_type: &field_type,
field_char: &field_char,
};
num_format::num_format(&field_info, Some(&asterisk_arg.to_string()))
.unwrap()
.parse::<isize>()
.unwrap()
}
pub enum CanAsterisk<T> {
Fixed(T),
Asterisk,
}
// Sub is a tokenizer which creates tokens
// for substitution segments of a format string
pub struct Sub {
min_width: CanAsterisk<Option<isize>>,
second_field: CanAsterisk<Option<u32>>,
field_char: char,
field_type: FieldType,
orig: String,
prefix_char: char,
}
impl Sub {
pub fn new(
min_width: CanAsterisk<Option<isize>>,
second_field: CanAsterisk<Option<u32>>,
field_char: char,
orig: String,
prefix_char: char,
) -> Self {
// for more dry printing, field characters are grouped
// in initialization of token.
let field_type = match field_char {
's' | 'b' => FieldType::Strf,
'd' | 'i' | 'u' | 'o' | 'x' | 'X' => FieldType::Intf,
'f' | 'F' => FieldType::Floatf,
'a' | 'A' => FieldType::CninetyNineHexFloatf,
'e' | 'E' => FieldType::Scif,
'g' | 'G' => FieldType::Decf,
'c' => FieldType::Charf,
_ => {
// should be unreachable.
println!("Invalid field type");
exit(EXIT_ERR);
}
};
Self {
min_width,
second_field,
field_char,
field_type,
orig,
prefix_char,
}
}
}
#[derive(Default)]
pub(crate) struct SubParser {
min_width_tmp: Option<String>,
min_width_is_asterisk: bool,
past_decimal: bool,
second_field_tmp: Option<String>,
second_field_is_asterisk: bool,
specifiers_found: bool,
field_char: Option<char>,
text_so_far: String,
}
impl SubParser {
fn new() -> Self {
Self::default()
}
pub(crate) fn from_it<W>(
writer: &mut W,
it: &mut PutBackN<Chars>,
args: &mut Peekable<Iter<String>>,
) -> UResult<Option<token::Token>>
where
W: Write,
{
let mut parser = Self::new();
if parser.sub_vals_retrieved(it)? {
let t = Self::build_token(parser);
t.write(writer, args);
Ok(Some(t))
} else {
Ok(None)
}
}
fn build_token(parser: Self) -> token::Token {
// not a self method so as to allow move of sub-parser vals.
// return new Sub struct as token
let prefix_char = match &parser.min_width_tmp {
Some(width) if width.starts_with('0') => '0',
_ => ' ',
};
token::Token::Sub(Sub::new(
if parser.min_width_is_asterisk {
CanAsterisk::Asterisk
} else {
CanAsterisk::Fixed(
parser
.min_width_tmp
.map(|x| x.parse::<isize>().unwrap_or(1)),
)
},
if parser.second_field_is_asterisk {
CanAsterisk::Asterisk
} else {
CanAsterisk::Fixed(parser.second_field_tmp.map(|x| x.parse::<u32>().unwrap()))
},
parser.field_char.unwrap(),
parser.text_so_far,
prefix_char,
))
}
#[allow(clippy::cognitive_complexity)]
fn sub_vals_retrieved(&mut self, it: &mut PutBackN<Chars>) -> UResult<bool> {
if !Self::successfully_eat_prefix(it, &mut self.text_so_far)? {
return Ok(false);
}
// this fn in particular is much longer than it needs to be
// .could get a lot
// of code savings just by cleaning it up. shouldn't use a regex
// though, as we want to mimic the original behavior of printing
// the field as interpreted up until the error in the field.
let mut legal_fields = vec![
// 'a', 'A', //c99 hex float implementation not yet complete
'b', 'c', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'i', 'o', 's', 'u', 'x', 'X',
];
let mut specifiers = vec!['h', 'j', 'l', 'L', 't', 'z'];
legal_fields.sort_unstable();
specifiers.sort_unstable();
// divide substitution from %([0-9]+)?(.[0-9+])?([a-zA-Z])
// into min_width, second_field, field_char
for ch in it {
self.text_so_far.push(ch);
match ch {
'-' | '*' | '0'..='9' => {
if self.past_decimal {
// second field should never have a
// negative value
if self.second_field_is_asterisk || ch == '-' || self.specifiers_found {
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
}
if self.second_field_tmp.is_none() {
self.second_field_tmp = Some(String::new());
}
match self.second_field_tmp.as_mut() {
Some(x) => {
if ch == '*' && !x.is_empty() {
return Err(
SubError::InvalidSpec(self.text_so_far.clone()).into()
);
}
if ch == '*' {
self.second_field_is_asterisk = true;
}
x.push(ch);
}
None => {
panic!("should be unreachable");
}
}
} else {
if self.min_width_is_asterisk || self.specifiers_found {
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
}
if self.min_width_tmp.is_none() {
self.min_width_tmp = Some(String::new());
}
match self.min_width_tmp.as_mut() {
Some(x) => {
if (ch == '-' || ch == '*') && !x.is_empty() {
return Err(
SubError::InvalidSpec(self.text_so_far.clone()).into()
);
}
if ch == '*' {
self.min_width_is_asterisk = true;
}
x.push(ch);
}
None => {
panic!("should be unreachable");
}
}
}
}
'.' => {
if self.past_decimal {
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
} else {
self.past_decimal = true;
}
}
x if legal_fields.binary_search(&x).is_ok() => {
self.field_char = Some(ch);
self.text_so_far.push(ch);
break;
}
x if specifiers.binary_search(&x).is_ok() => {
if !self.past_decimal {
self.past_decimal = true;
}
if !self.specifiers_found {
self.specifiers_found = true;
}
}
_ => {
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
}
}
}
if self.field_char.is_none() {
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
}
let field_char_retrieved = self.field_char.unwrap();
if self.past_decimal && self.second_field_tmp.is_none() {
self.second_field_tmp = Some(String::from("0"));
}
self.validate_field_params(field_char_retrieved)?;
// if the dot is provided without a second field
// printf interprets it as 0.
if let Some(x) = self.second_field_tmp.as_mut() {
if x.is_empty() {
self.min_width_tmp = Some(String::from("0"));
}
}
Ok(true)
}
fn successfully_eat_prefix(
it: &mut PutBackN<Chars>,
text_so_far: &mut String,
) -> UResult<bool> {
// get next two chars,
// if they're '%%' we're not tokenizing it
// else put chars back
let preface = it.next();
let n_ch = it.next();
if preface == Some('%') && n_ch != Some('%') {
match n_ch {
Some(x) => {
it.put_back(x);
Ok(true)
}
None => {
text_so_far.push('%');
Err(SubError::InvalidSpec(text_so_far.clone()).into())
}
}
} else {
if let Some(x) = n_ch {
it.put_back(x);
};
if let Some(x) = preface {
it.put_back(x);
};
Ok(false)
}
}
fn validate_field_params(&self, field_char: char) -> UResult<()> {
// check for illegal combinations here when possible vs
// on each application so we check less per application
// to do: move these checks to Sub::new
if (field_char == 's' && self.min_width_tmp == Some(String::from("0")))
|| (field_char == 'c'
&& (self.min_width_tmp == Some(String::from("0")) || self.past_decimal))
|| (field_char == 'b'
&& (self.min_width_tmp.is_some()
|| self.past_decimal
|| self.second_field_tmp.is_some()))
{
// invalid string substitution
// to do: include information about an invalid
// string substitution
return Err(SubError::InvalidSpec(self.text_so_far.clone()).into());
}
Ok(())
}
}
impl Sub {
#[allow(clippy::cognitive_complexity)]
pub(crate) fn write<W>(&self, writer: &mut W, pf_args_it: &mut Peekable<Iter<String>>)
where
W: Write,
{
let field = FormatField {
min_width: match self.min_width {
CanAsterisk::Fixed(x) => x,
CanAsterisk::Asterisk => {
match pf_args_it.next() {
// temporary, use intf.rs instead
Some(x) => Some(convert_asterisk_arg_int(x)),
None => Some(0),
}
}
},
second_field: match self.second_field {
CanAsterisk::Fixed(x) => x,
CanAsterisk::Asterisk => {
match pf_args_it.next() {
// temporary, use intf.rs instead
Some(x) => {
let result = convert_asterisk_arg_int(x);
if result < 0 {
None
} else {
Some(result as u32)
}
}
None => Some(0),
}
}
},
field_char: &self.field_char,
field_type: &self.field_type,
orig: &self.orig,
};
let pf_arg = pf_args_it.next();
// minimum width is handled independently of actual
// field char
let pre_min_width_opt: Option<String> = match *field.field_type {
// if %s just return arg
// if %b use UnescapedText module's unescape-fn
// if %c return first char of arg
FieldType::Strf | FieldType::Charf => {
match pf_arg {
Some(arg_string) => {
match *field.field_char {
's' => Some(match field.second_field {
Some(max) => String::from(&arg_string[..max as usize]),
None => arg_string.clone(),
}),
'b' => {
let mut a_it = put_back_n(arg_string.chars());
UnescapedText::from_it_core(writer, &mut a_it, true);
None
}
// for 'c': get iter of string vals,
// get opt<char> of first val
// and map it to opt<String>
/* 'c' | */
_ => arg_string.chars().next().map(|x| x.to_string()),
}
}
None => None,
}
}
_ => {
// non string/char fields are delegated to num_format
num_format::num_format(&field, pf_arg)
}
};
if let Some(pre_min_width) = pre_min_width_opt {
// if have a string, print it, ensuring minimum width is met.
write!(
writer,
"{}",
match field.min_width {
Some(min_width) => {
let diff: isize = min_width.abs() - pre_min_width.len() as isize;
if diff > 0 {
let mut final_str = String::new();
// definitely more efficient ways
// to do this.
let pad_before = min_width > 0;
if !pad_before {
final_str.push_str(&pre_min_width);
}
for _ in 0..diff {
final_str.push(self.prefix_char);
}
if pad_before {
final_str.push_str(&pre_min_width);
}
final_str
} else {
pre_min_width
}
}
None => pre_min_width,
}
)
.ok();
}
}
}

View file

@ -1,39 +0,0 @@
//! Traits and enums dealing with Tokenization of printf Format String
use std::io::Write;
use std::iter::Peekable;
use std::slice::Iter;
use crate::features::tokenize::sub::Sub;
use crate::features::tokenize::unescaped_text::UnescapedText;
// A token object is an object that can print the expected output
// of a contiguous segment of the format string, and
// requires at most 1 argument
pub enum Token {
Sub(Sub),
UnescapedText(UnescapedText),
}
impl Token {
pub(crate) fn write<W>(&self, writer: &mut W, args: &mut Peekable<Iter<String>>)
where
W: Write,
{
match self {
Self::Sub(sub) => sub.write(writer, args),
Self::UnescapedText(unescaped_text) => unescaped_text.write(writer),
}
}
}
// A tokenizer object is an object that takes an iterator
// at a position in a format string, and sees whether
// it can return a token of a type it knows how to produce
// if so, return the token, move the iterator past the
// format string text the token represents, and if an
// argument is used move the argument iter forward one
// creating token of a format string segment should also cause
// printing of that token's value. Essentially tokenizing
// a whole format string will print the format string and consume
// a number of arguments equal to the number of argument-using tokens

View file

@ -1,279 +0,0 @@
//! UnescapedText is a tokenizer impl
//! for tokenizing character literals,
//! and escaped character literals (of allowed escapes),
//! into an unescaped text byte array
// spell-checker:ignore (ToDO) retval hexchars octals printf's bvec vals coreutil addchar eval bytecode bslice
use itertools::PutBackN;
use std::char::from_u32;
use std::io::Write;
use std::process::exit;
use std::str::Chars;
use super::token;
const EXIT_OK: i32 = 0;
const EXIT_ERR: i32 = 1;
// by default stdout only flushes
// to console when a newline is passed.
macro_rules! write_and_flush {
($writer:expr, $($args:tt)+) => ({
write!($writer, "{}", $($args)+).ok();
$writer.flush().ok();
})
}
fn flush_bytes<W>(writer: &mut W, bslice: &[u8])
where
W: Write,
{
writer.write_all(bslice).ok();
writer.flush().ok();
}
#[derive(Default)]
pub struct UnescapedText(Vec<u8>);
impl UnescapedText {
fn new() -> Self {
Self::default()
}
// take an iterator to the format string
// consume between min and max chars
// and return it as a base-X number
fn base_to_u32(min_chars: u8, max_chars: u8, base: u32, it: &mut PutBackN<Chars>) -> u32 {
let mut retval: u32 = 0;
let mut found = 0;
while found < max_chars {
// if end of input break
let nc = it.next();
match nc {
Some(digit) => {
// if end of hexchars break
match digit.to_digit(base) {
Some(d) => {
found += 1;
retval *= base;
retval += d;
}
None => {
it.put_back(digit);
break;
}
}
}
None => {
break;
}
}
}
if found < min_chars {
// only ever expected for hex
println!("missing hexadecimal number in escape"); //todo stderr
exit(EXIT_ERR);
}
retval
}
// validates against valid
// IEC 10646 vals - these values
// are pinned against the more popular
// printf so as to not disrupt when
// dropped-in as a replacement.
fn validate_iec(val: u32, eight_word: bool) {
let mut preface = 'u';
let leading_zeros = if eight_word {
preface = 'U';
8
} else {
4
};
let err_msg = format!("invalid universal character name {preface}{val:0leading_zeros$x}");
if (val < 159 && (val != 36 && val != 64 && val != 96)) || (val > 55296 && val < 57343) {
println!("{err_msg}"); //todo stderr
exit(EXIT_ERR);
}
}
// pass an iterator that succeeds an '/',
// and process the remaining character
// adding the unescaped bytes
// to the passed byte_vec
// in subs_mode change octal behavior
fn handle_escaped<W>(
writer: &mut W,
byte_vec: &mut Vec<u8>,
it: &mut PutBackN<Chars>,
subs_mode: bool,
) where
W: Write,
{
let ch = it.next().unwrap_or('\\');
match ch {
'0'..='9' | 'x' => {
let min_len = 1;
let mut max_len = 2;
let mut base = 16;
let ignore = false;
match ch {
'x' => {}
e @ '0'..='9' => {
max_len = 3;
base = 8;
// in practice, gnu coreutils printf
// interprets octals without a
// leading zero in %b
// but it only skips leading zeros
// in %b mode.
// if we ever want to match gnu coreutil
// printf's docs instead of its behavior
// we'd set this to true.
// if subs_mode && e != '0'
// { ignore = true; }
if !subs_mode || e != '0' {
it.put_back(ch);
}
}
_ => {}
}
if ignore {
byte_vec.push(ch as u8);
} else {
let val = (Self::base_to_u32(min_len, max_len, base, it) % 256) as u8;
byte_vec.push(val);
let bvec = [val];
flush_bytes(writer, &bvec);
}
}
e => {
// only for hex and octal
// is byte encoding specified.
// otherwise, why not leave the door open
// for other encodings unless it turns out
// a bottleneck.
let mut s = String::new();
let ch = match e {
'\\' => '\\',
'"' => '"',
'n' => '\n',
'r' => '\r',
't' => '\t',
// bell
'a' => '\x07',
// backspace
'b' => '\x08',
// vertical tab
'v' => '\x0B',
// form feed
'f' => '\x0C',
// escape character
'e' => '\x1B',
'c' => exit(EXIT_OK),
'u' | 'U' => {
let len = match e {
'u' => 4,
/* 'U' | */ _ => 8,
};
let val = Self::base_to_u32(len, len, 16, it);
Self::validate_iec(val, false);
if let Some(c) = from_u32(val) {
c
} else {
'-'
}
}
_ => {
s.push('\\');
ch
}
};
s.push(ch);
write_and_flush!(writer, &s);
byte_vec.extend(s.bytes());
}
};
}
// take an iterator to a string,
// and return a wrapper around a Vec<u8> of unescaped bytes
// break on encounter of sub symbol ('%[^%]') unless called
// through %b subst.
#[allow(clippy::cognitive_complexity)]
pub fn from_it_core<W>(
writer: &mut W,
it: &mut PutBackN<Chars>,
subs_mode: bool,
) -> Option<token::Token>
where
W: Write,
{
let mut addchar = false;
let mut new_text = Self::new();
let mut tmp_str = String::new();
{
let new_vec: &mut Vec<u8> = &mut (new_text.0);
while let Some(ch) = it.next() {
if !addchar {
addchar = true;
}
match ch {
x if x != '\\' && x != '%' => {
// lazy branch eval
// remember this fn could be called
// many times in a single exec through %b
write_and_flush!(writer, ch);
tmp_str.push(ch);
}
'\\' => {
// the literal may be a literal bytecode
// and not valid utf-8. Str only supports
// valid utf-8.
// if we find the unnecessary drain
// on non hex or octal escapes is costly
// then we can make it faster/more complex
// with as-necessary draining.
if !tmp_str.is_empty() {
new_vec.extend(tmp_str.bytes());
tmp_str = String::new();
}
Self::handle_escaped(writer, new_vec, it, subs_mode);
}
x if x == '%' && !subs_mode => {
if let Some(follow) = it.next() {
if follow == '%' {
write_and_flush!(writer, ch);
tmp_str.push(ch);
} else {
it.put_back(follow);
it.put_back(ch);
break;
}
} else {
it.put_back(ch);
break;
}
}
_ => {
write_and_flush!(writer, ch);
tmp_str.push(ch);
}
}
}
if !tmp_str.is_empty() {
new_vec.extend(tmp_str.bytes());
}
}
if addchar {
Some(token::Token::UnescapedText(new_text))
} else {
None
}
}
}
impl UnescapedText {
pub(crate) fn write<W>(&self, writer: &mut W)
where
W: Write,
{
flush_bytes(writer, &self.0[..]);
}
}

View file

@ -44,8 +44,8 @@ pub use crate::features::fs;
pub use crate::features::fsext;
#[cfg(feature = "lines")]
pub use crate::features::lines;
#[cfg(feature = "memo")]
pub use crate::features::memo;
#[cfg(feature = "format")]
pub use crate::features::format;
#[cfg(feature = "ringbuffer")]
pub use crate::features::ringbuffer;
#[cfg(feature = "sum")]