Remove Clone bound on parse_dec_float()

It's not necessary to clone the character iterator at all.
Also move rarely used inf/nan parsing to own cold function.
This commit is contained in:
Mahmoud Al-Qudsi 2024-06-19 18:43:53 -05:00
parent 373cef08cc
commit 28a3ae7a8b

View file

@ -1,47 +1,36 @@
use super::errors::Error; use super::errors::Error;
use super::hex_float; use super::hex_float;
use crate::wchar::IntoCharIter; use crate::wchar::IntoCharIter;
use std::num::ParseFloatError;
// Parse a decimal float from a sequence of characters. // Parse a decimal float from a sequence of characters.
// Return the parsed float, and (on success) the number of characters consumed. // Return the parsed float, and (on success) the number of characters consumed.
fn parse_dec_float<I>( fn parse_dec_float<I>(chars: I, decimal_sep: char, consumed: &mut usize) -> Option<f64>
mut chars: I,
decimal_sep: char,
consumed: &mut usize,
) -> Result<f64, ParseFloatError>
where where
I: Iterator<Item = char> + Clone, I: Iterator<Item = char>,
{ {
// This uses Rust's native float parsing and a temporary string. // This uses Rust's native float parsing and a temporary string.
// The EBNF grammar is at https://doc.rust-lang.org/std/primitive.f64.html#method.from_str // The EBNF grammar is at https://doc.rust-lang.org/std/primitive.f64.html#method.from_str
// Note it is case-insensitive and we replace the decimal separator with a period. // Note it is case-insensitive and we replace the decimal separator with a period.
let mut s = String::new(); let mut s = String::new();
if matches!(chars.clone().next(), Some('+' | '-')) { let mut chars = chars.peekable();
s.push(chars.next().unwrap()); if let Some(sign) = chars.next_if(|c| ['-', '+'].contains(c)) {
s.push(sign);
} }
for spec in ["infinity", "inf", "nan"] { if chars
if chars .peek()
.clone() .map(|c| c.is_ascii_alphabetic())
.take(spec.len()) .unwrap_or(false)
.map(|c| c.to_ascii_lowercase()) {
.eq(spec.chars()) return parse_inf_nan(chars, s.as_bytes().get(0).copied(), consumed);
{
s.push_str(spec);
let res = s.parse::<f64>()?;
*consumed = s.len();
return Ok(res);
}
} }
while chars.clone().next().map_or(false, |c| c.is_ascii_digit()) { while let Some(c) = chars.next_if(|c| c.is_ascii_digit()) {
s.push(chars.next().unwrap()); s.push(c);
} }
if chars.clone().next() == Some(decimal_sep) { if chars.next_if(|c| *c == decimal_sep).is_some() {
chars.next();
s.push('.'); // Replace decimal separator with a period. s.push('.'); // Replace decimal separator with a period.
while chars.clone().next().map_or(false, |c| c.is_ascii_digit()) { while let Some(c) = chars.next_if(|c| c.is_ascii_digit()) {
s.push(chars.next().unwrap()); s.push(c);
} }
} }
@ -50,15 +39,15 @@ where
// one digit after the decimal separator. Keep track of how many we have, // one digit after the decimal separator. Keep track of how many we have,
// and the length before. // and the length before.
let len_before_exp = s.len(); let len_before_exp = s.len();
if matches!(chars.clone().next(), Some('E' | 'e')) { if let Some(e) = chars.next_if(|c| ['E', 'e'].contains(c)) {
s.push(chars.next().unwrap()); s.push(e);
if matches!(chars.clone().next(), Some('+' | '-')) { if let Some(sign) = chars.next_if(|c| matches!(c, '+' | '-')) {
s.push(chars.next().unwrap()); s.push(sign);
} }
let mut saw_exp_digit = false; let mut saw_exp_digit = false;
while chars.clone().next().map_or(false, |c| c.is_ascii_digit()) { while let Some(c) = chars.next_if(|c| c.is_ascii_digit()) {
saw_exp_digit = true; saw_exp_digit = true;
s.push(chars.next().unwrap()); s.push(c);
} }
if !saw_exp_digit { if !saw_exp_digit {
// We didn't see any digits after the exponent. // We didn't see any digits after the exponent.
@ -66,9 +55,48 @@ where
s.truncate(len_before_exp); s.truncate(len_before_exp);
} }
} }
let res = s.parse::<f64>()?; let res = s.parse::<f64>().ok()?;
*consumed = s.len(); // note this is the number of chars because only ASCII is recognized. *consumed = s.len(); // note this is the number of chars because only ASCII is recognized.
Ok(res) Some(res)
}
#[cold]
#[inline(never)]
pub fn parse_inf_nan(
chars: impl Iterator<Item = char>,
sign: Option<u8>,
consumed: &mut usize,
) -> Option<f64> {
let mut chars = chars
.take_while(|c| c.is_ascii())
.map(|c| c.to_ascii_lowercase() as u8);
let (count, neg) = match sign {
None => (3, false),
Some(b'-') => (4, true),
_ => (4, false),
};
let [c1, c2, c3] = [chars.next()?, chars.next()?, chars.next()?];
// Using non-short-circuiting comparisons lets the compiler optimize it a bit more.
if (c1 == b'n') & (c2 == b'a') & (c3 == b'n') {
*consumed += count;
if !neg {
return Some(f64::NAN);
}
// LLVM understands this and returns f64::from_bits(0xFFF8000000000000) directly
return Some(f64::NAN.copysign(-1.0));
}
if (c1 == b'i') & (c2 == b'n') & (c3 == b'f') {
*consumed += count;
// "xyz".chars().all(..) inlines nicely while "xyz".chars().eq(chars.take(3)) doesn't.
if b"inity".iter().all(|c| Some(*c) == chars.next()) {
*consumed += 5;
}
if !neg {
return Some(f64::INFINITY);
}
return Some(f64::NEG_INFINITY);
}
return None;
} }
fn wcstod_inner<I>(mut chars: I, decimal_sep: char, consumed: &mut usize) -> Result<f64, Error> fn wcstod_inner<I>(mut chars: I, decimal_sep: char, consumed: &mut usize) -> Result<f64, Error>
@ -102,7 +130,7 @@ where
} }
let ret = parse_dec_float(chars.clone(), decimal_sep, consumed); let ret = parse_dec_float(chars.clone(), decimal_sep, consumed);
if ret.is_err() { if ret.is_none() {
*consumed = 0; *consumed = 0;
return Err(Error::InvalidChar); return Err(Error::InvalidChar);
} }