From 28a3ae7a8b432792a578aac4e36ab3c775e6ba61 Mon Sep 17 00:00:00 2001 From: Mahmoud Al-Qudsi Date: Wed, 19 Jun 2024 18:43:53 -0500 Subject: [PATCH] Remove Clone bound on parse_dec_float() It's not necessary to clone the character iterator at all. Also move rarely used inf/nan parsing to own cold function. --- src/wutil/wcstod.rs | 100 ++++++++++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 36 deletions(-) diff --git a/src/wutil/wcstod.rs b/src/wutil/wcstod.rs index e4467a5a1..20d8efe90 100644 --- a/src/wutil/wcstod.rs +++ b/src/wutil/wcstod.rs @@ -1,47 +1,36 @@ use super::errors::Error; use super::hex_float; use crate::wchar::IntoCharIter; -use std::num::ParseFloatError; // Parse a decimal float from a sequence of characters. // Return the parsed float, and (on success) the number of characters consumed. -fn parse_dec_float( - mut chars: I, - decimal_sep: char, - consumed: &mut usize, -) -> Result +fn parse_dec_float(chars: I, decimal_sep: char, consumed: &mut usize) -> Option where - I: Iterator + Clone, + I: Iterator, { // This uses Rust's native float parsing and a temporary string. // The EBNF grammar is at https://doc.rust-lang.org/std/primitive.f64.html#method.from_str // Note it is case-insensitive and we replace the decimal separator with a period. let mut s = String::new(); - if matches!(chars.clone().next(), Some('+' | '-')) { - s.push(chars.next().unwrap()); + let mut chars = chars.peekable(); + if let Some(sign) = chars.next_if(|c| ['-', '+'].contains(c)) { + s.push(sign); } - for spec in ["infinity", "inf", "nan"] { - if chars - .clone() - .take(spec.len()) - .map(|c| c.to_ascii_lowercase()) - .eq(spec.chars()) - { - s.push_str(spec); - let res = s.parse::()?; - *consumed = s.len(); - return Ok(res); - } + if chars + .peek() + .map(|c| c.is_ascii_alphabetic()) + .unwrap_or(false) + { + return parse_inf_nan(chars, s.as_bytes().get(0).copied(), consumed); } - while chars.clone().next().map_or(false, |c| c.is_ascii_digit()) { - s.push(chars.next().unwrap()); + while let Some(c) = chars.next_if(|c| c.is_ascii_digit()) { + s.push(c); } - if chars.clone().next() == Some(decimal_sep) { - chars.next(); + if chars.next_if(|c| *c == decimal_sep).is_some() { s.push('.'); // Replace decimal separator with a period. - while chars.clone().next().map_or(false, |c| c.is_ascii_digit()) { - s.push(chars.next().unwrap()); + while let Some(c) = chars.next_if(|c| c.is_ascii_digit()) { + s.push(c); } } @@ -50,15 +39,15 @@ where // one digit after the decimal separator. Keep track of how many we have, // and the length before. let len_before_exp = s.len(); - if matches!(chars.clone().next(), Some('E' | 'e')) { - s.push(chars.next().unwrap()); - if matches!(chars.clone().next(), Some('+' | '-')) { - s.push(chars.next().unwrap()); + if let Some(e) = chars.next_if(|c| ['E', 'e'].contains(c)) { + s.push(e); + if let Some(sign) = chars.next_if(|c| matches!(c, '+' | '-')) { + s.push(sign); } let mut saw_exp_digit = false; - while chars.clone().next().map_or(false, |c| c.is_ascii_digit()) { + while let Some(c) = chars.next_if(|c| c.is_ascii_digit()) { saw_exp_digit = true; - s.push(chars.next().unwrap()); + s.push(c); } if !saw_exp_digit { // We didn't see any digits after the exponent. @@ -66,9 +55,48 @@ where s.truncate(len_before_exp); } } - let res = s.parse::()?; + let res = s.parse::().ok()?; *consumed = s.len(); // note this is the number of chars because only ASCII is recognized. - Ok(res) + Some(res) +} + +#[cold] +#[inline(never)] +pub fn parse_inf_nan( + chars: impl Iterator, + sign: Option, + consumed: &mut usize, +) -> Option { + let mut chars = chars + .take_while(|c| c.is_ascii()) + .map(|c| c.to_ascii_lowercase() as u8); + let (count, neg) = match sign { + None => (3, false), + Some(b'-') => (4, true), + _ => (4, false), + }; + let [c1, c2, c3] = [chars.next()?, chars.next()?, chars.next()?]; + // Using non-short-circuiting comparisons lets the compiler optimize it a bit more. + if (c1 == b'n') & (c2 == b'a') & (c3 == b'n') { + *consumed += count; + if !neg { + return Some(f64::NAN); + } + // LLVM understands this and returns f64::from_bits(0xFFF8000000000000) directly + return Some(f64::NAN.copysign(-1.0)); + } + if (c1 == b'i') & (c2 == b'n') & (c3 == b'f') { + *consumed += count; + // "xyz".chars().all(..) inlines nicely while "xyz".chars().eq(chars.take(3)) doesn't. + if b"inity".iter().all(|c| Some(*c) == chars.next()) { + *consumed += 5; + } + if !neg { + return Some(f64::INFINITY); + } + return Some(f64::NEG_INFINITY); + } + return None; } fn wcstod_inner(mut chars: I, decimal_sep: char, consumed: &mut usize) -> Result @@ -102,7 +130,7 @@ where } let ret = parse_dec_float(chars.clone(), decimal_sep, consumed); - if ret.is_err() { + if ret.is_none() { *consumed = 0; return Err(Error::InvalidChar); }