Rework CStrUnit.

- Rename it as `MixedUnit`, because it will soon be used in more than just C string literals. - Change the `Byte` variant to `HighByte` and use it only for `\x80`..`\xff` cases. This fixes the old inexactness where ASCII chars could be encoded with either `Byte` or `Char`. - Add useful comments. - Remove `is_ascii`, in favour of `u8::is_ascii`.
2024-12-26 13:03:31 +00:00 · 2024-01-23 12:27:56 +11:00 · 2024-01-23 12:27:56 +11:00 · 56514076ac
commit 56514076ac
parent 85d56eeb63
1 changed files with 4 additions and 5 deletions
--- a/crates/syntax/src/ast/token_ext.rs
+++ b/crates/syntax/src/ast/token_ext.rs
@ -6,7 +6,7 @@ use std::{
 };

 use rustc_lexer::unescape::{
-    unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, Mode,
+    unescape_byte, unescape_c_string, unescape_char, unescape_literal, MixedUnit, Mode,
 };

 use crate::{
@ -336,10 +336,9 @@ impl ast::CString {
        let mut buf = Vec::new();
        let mut prev_end = 0;
        let mut has_error = false;
-        let mut char_buf = [0u8; 4];
-        let mut extend_unit = |buf: &mut Vec<u8>, unit: CStrUnit| match unit {
-            CStrUnit::Byte(b) => buf.push(b),
-            CStrUnit::Char(c) => buf.extend(c.encode_utf8(&mut char_buf).as_bytes()),
+        let extend_unit = |buf: &mut Vec<u8>, unit: MixedUnit| match unit {
+            MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()),
+            MixedUnit::HighByte(b) => buf.push(b),
        };
        unescape_c_string(text, Self::MODE, &mut |char_range, unescaped| match (
            unescaped,