Slightly refactor unescape_string_xxx() functions

* Since we already have an allocation of length wstr.len(), it's
  probably better to allocate the result (which is strictly less than or
  equal to the input length) up-front rather than risk thrashing the Vec
  allocation,
* There's no need to compare c2 against '\0' since that will just cause
  to_digit(16) to return None anyway,
* Our convert_hex() specialization of to_digit(16) that only checks
  capital letters A-F without also checking lowercase a-f isn't
  significantly faster than just use to_digit(16), and we already assert
  that the input *wasn't* a lowercase a-f before making the call, so
  there's no point in using a special function to handle that.
This commit is contained in:
Mahmoud Al-Qudsi 2023-04-26 15:18:27 -05:00
parent c55ec59e22
commit 67124dfb11

View file

@ -687,10 +687,10 @@ fn unescape_string_internal(input: &wstr, flags: UnescapeFlags) -> Option<WStrin
Some(result) Some(result)
} }
/// Reverse the effects of `escape_string_url()`. By definition the string has consist of just ASCII /// Reverse the effects of `escape_string_url()`. By definition the input should consist of just
/// chars. /// ASCII chars.
fn unescape_string_url(input: &wstr) -> Option<WString> { fn unescape_string_url(input: &wstr) -> Option<WString> {
let mut result: Vec<u8> = vec![]; let mut result: Vec<u8> = Vec::with_capacity(input.len());
let mut i = 0; let mut i = 0;
while i < input.len() { while i < input.len() {
let c = input.char_at(i); let c = input.char_at(i);
@ -705,12 +705,9 @@ fn unescape_string_url(input: &wstr) -> Option<WString> {
result.push(b'%'); result.push(b'%');
i += 1; i += 1;
} else { } else {
let c2 = input.char_at(i + 2);
if c2 == '\0' {
return None; // string ended prematurely
}
let d1 = c1.to_digit(16)?; let d1 = c1.to_digit(16)?;
let d2 = c2.to_digit(16)?; let c2 = input.char_at(i + 2);
let d2 = c2.to_digit(16)?; // also fails if '\0' i.e. premature end
result.push((16 * d1 + d2) as u8); result.push((16 * d1 + d2) as u8);
i += 2; i += 2;
} }
@ -723,10 +720,10 @@ fn unescape_string_url(input: &wstr) -> Option<WString> {
Some(str2wcstring(&result)) Some(str2wcstring(&result))
} }
/// Reverse the effects of `escape_string_var()`. By definition the string has consist of just ASCII /// Reverse the effects of `escape_string_var()`. By definition the string should consist of just
/// chars. /// ASCII chars.
fn unescape_string_var(input: &wstr) -> Option<WString> { fn unescape_string_var(input: &wstr) -> Option<WString> {
let mut result: Vec<u8> = vec![]; let mut result: Vec<u8> = Vec::with_capacity(input.len());
let mut prev_was_hex_encoded = false; let mut prev_was_hex_encoded = false;
let mut i = 0; let mut i = 0;
while i < input.len() { while i < input.len() {
@ -741,17 +738,13 @@ fn unescape_string_var(input: &wstr) -> Option<WString> {
break; break;
} }
return None; // found unexpected escape char at end of string return None; // found unexpected escape char at end of string
} } else if c1 == '_' {
if c1 == '_' {
result.push(b'_'); result.push(b'_');
i += 1; i += 1;
} else if ('0'..='9').contains(&c1) || ('A'..='F').contains(&c1) { } else if ('0'..='9').contains(&c1) || ('A'..='F').contains(&c1) {
let d1 = c1.to_digit(16)?;
let c2 = input.char_at(i + 2); let c2 = input.char_at(i + 2);
if c2 == '\0' { let d2 = c2.to_digit(16)?; // also fails if '\0' i.e. premature end
return None; // string ended prematurely
}
let d1 = convert_hex_digit(c1)?;
let d2 = convert_hex_digit(c2)?;
result.push((16 * d1 + d2) as u8); result.push((16 * d1 + d2) as u8);
i += 2; i += 2;
prev_was_hex_encoded = true; prev_was_hex_encoded = true;
@ -946,18 +939,6 @@ pub fn read_unquoted_escape(
Some(in_pos) Some(in_pos)
} }
/// This is a specialization of `char::to_digit()` that only handles base 16 and only uppercase.
fn convert_hex_digit(d: char) -> Option<u32> {
let val = if ('0'..='9').contains(&d) {
u32::from(d) - u32::from('0')
} else if ('A'..='Z').contains(&d) {
10 + u32::from(d) - u32::from('A')
} else {
return None;
};
Some(val)
}
pub const fn char_offset(base: char, offset: u32) -> char { pub const fn char_offset(base: char, offset: u32) -> char {
match char::from_u32(base as u32 + offset) { match char::from_u32(base as u32 + offset) {
Some(c) => c, Some(c) => c,