mirror of
https://github.com/rust-lang/rust-analyzer
synced 2024-12-27 05:23:24 +00:00
Fix unescaping of C string literals
This commit is contained in:
parent
de1f766820
commit
59a3e42ac9
6 changed files with 49 additions and 21 deletions
|
@ -634,7 +634,7 @@ impl Printer<'_> {
|
||||||
match literal {
|
match literal {
|
||||||
Literal::String(it) => w!(self, "{:?}", it),
|
Literal::String(it) => w!(self, "{:?}", it),
|
||||||
Literal::ByteString(it) => w!(self, "\"{}\"", it.escape_ascii()),
|
Literal::ByteString(it) => w!(self, "\"{}\"", it.escape_ascii()),
|
||||||
Literal::CString(it) => w!(self, "\"{}\\0\"", it),
|
Literal::CString(it) => w!(self, "\"{}\\0\"", it.escape_ascii()),
|
||||||
Literal::Char(it) => w!(self, "'{}'", it.escape_debug()),
|
Literal::Char(it) => w!(self, "'{}'", it.escape_debug()),
|
||||||
Literal::Bool(it) => w!(self, "{}", it),
|
Literal::Bool(it) => w!(self, "{}", it),
|
||||||
Literal::Int(i, suffix) => {
|
Literal::Int(i, suffix) => {
|
||||||
|
|
|
@ -85,7 +85,7 @@ impl fmt::Display for FloatTypeWrapper {
|
||||||
pub enum Literal {
|
pub enum Literal {
|
||||||
String(Box<str>),
|
String(Box<str>),
|
||||||
ByteString(Box<[u8]>),
|
ByteString(Box<[u8]>),
|
||||||
CString(Box<str>),
|
CString(Box<[u8]>),
|
||||||
Char(char),
|
Char(char),
|
||||||
Bool(bool),
|
Bool(bool),
|
||||||
Int(i128, Option<BuiltinInt>),
|
Int(i128, Option<BuiltinInt>),
|
||||||
|
|
|
@ -1355,7 +1355,6 @@ impl<'ctx> MirLowerCtx<'ctx> {
|
||||||
return Ok(Operand::from_concrete_const(data, mm, ty));
|
return Ok(Operand::from_concrete_const(data, mm, ty));
|
||||||
}
|
}
|
||||||
hir_def::hir::Literal::CString(b) => {
|
hir_def::hir::Literal::CString(b) => {
|
||||||
let b = b.as_bytes();
|
|
||||||
let bytes = b.iter().copied().chain(iter::once(0)).collect::<Vec<_>>();
|
let bytes = b.iter().copied().chain(iter::once(0)).collect::<Vec<_>>();
|
||||||
|
|
||||||
let mut data = Vec::with_capacity(mem::size_of::<usize>() * 2);
|
let mut data = Vec::with_capacity(mem::size_of::<usize>() * 2);
|
||||||
|
|
|
@ -161,8 +161,9 @@ pre { color: #DCDCCC; background: #3F3F3F; font-size: 22px; padd
|
||||||
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"Hello</span><span class="escape_sequence">\n</span><span class="string_literal macro">World"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
|
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"Hello</span><span class="escape_sequence">\n</span><span class="string_literal macro">World"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
|
||||||
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"</span><span class="escape_sequence">\u{48}</span><span class="escape_sequence">\x65</span><span class="escape_sequence">\x6C</span><span class="escape_sequence">\x6C</span><span class="escape_sequence">\x6F</span><span class="string_literal macro"> World"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
|
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"</span><span class="escape_sequence">\u{48}</span><span class="escape_sequence">\x65</span><span class="escape_sequence">\x6C</span><span class="escape_sequence">\x6C</span><span class="escape_sequence">\x6F</span><span class="string_literal macro"> World"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
|
||||||
|
|
||||||
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="string_literal">\xFF</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span> <span class="comment">// invalid non-UTF8 escape sequences</span>
|
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="string_literal">\xFF</span><span class="escape_sequence">\u{FF}</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span> <span class="comment">// invalid non-UTF8 escape sequences</span>
|
||||||
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">b"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="escape_sequence">\xFF</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span> <span class="comment">// valid bytes</span>
|
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">b"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="escape_sequence">\xFF</span><span class="string_literal">\u{FF}</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span> <span class="comment">// valid bytes, invalid unicodes</span>
|
||||||
|
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">c"</span><span class="escape_sequence">\u{FF}</span><span class="escape_sequence">\xFF</span><span class="string_literal">"</span><span class="semicolon">;</span> <span class="comment">// valid bytes, valid unicodes</span>
|
||||||
<span class="keyword">let</span> <span class="variable declaration reference">backslash</span> <span class="operator">=</span> <span class="string_literal">r"\\"</span><span class="semicolon">;</span>
|
<span class="keyword">let</span> <span class="variable declaration reference">backslash</span> <span class="operator">=</span> <span class="string_literal">r"\\"</span><span class="semicolon">;</span>
|
||||||
|
|
||||||
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"</span><span class="format_specifier">{</span><span class="escape_sequence">\x41</span><span class="format_specifier">}</span><span class="string_literal macro">"</span><span class="comma macro">,</span> <span class="none macro">A</span> <span class="operator macro">=</span> <span class="numeric_literal macro">92</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
|
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"</span><span class="format_specifier">{</span><span class="escape_sequence">\x41</span><span class="format_specifier">}</span><span class="string_literal macro">"</span><span class="comma macro">,</span> <span class="none macro">A</span> <span class="operator macro">=</span> <span class="numeric_literal macro">92</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
|
||||||
|
|
|
@ -507,8 +507,9 @@ fn main() {
|
||||||
println!("Hello\nWorld");
|
println!("Hello\nWorld");
|
||||||
println!("\u{48}\x65\x6C\x6C\x6F World");
|
println!("\u{48}\x65\x6C\x6C\x6F World");
|
||||||
|
|
||||||
let _ = "\x28\x28\x00\x63\xFF\n"; // invalid non-UTF8 escape sequences
|
let _ = "\x28\x28\x00\x63\xFF\u{FF}\n"; // invalid non-UTF8 escape sequences
|
||||||
let _ = b"\x28\x28\x00\x63\xFF\n"; // valid bytes
|
let _ = b"\x28\x28\x00\x63\xFF\u{FF}\n"; // valid bytes, invalid unicodes
|
||||||
|
let _ = c"\u{FF}\xFF"; // valid bytes, valid unicodes
|
||||||
let backslash = r"\\";
|
let backslash = r"\\";
|
||||||
|
|
||||||
println!("{\x41}", A = 92);
|
println!("{\x41}", A = 92);
|
||||||
|
|
|
@ -2,7 +2,9 @@
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use rustc_lexer::unescape::{unescape_byte, unescape_char, unescape_literal, Mode};
|
use rustc_lexer::unescape::{
|
||||||
|
unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit, Mode,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
ast::{self, AstToken},
|
ast::{self, AstToken},
|
||||||
|
@ -285,45 +287,70 @@ impl ast::ByteString {
|
||||||
|
|
||||||
impl IsString for ast::CString {
|
impl IsString for ast::CString {
|
||||||
const RAW_PREFIX: &'static str = "cr";
|
const RAW_PREFIX: &'static str = "cr";
|
||||||
// XXX: `Mode::CStr` is not supported by `unescape_literal` of ra-ap-rustc_lexer yet.
|
const MODE: Mode = Mode::CStr;
|
||||||
// Here we pretend it to be a byte string.
|
|
||||||
const MODE: Mode = Mode::ByteStr;
|
fn escaped_char_ranges(
|
||||||
|
&self,
|
||||||
|
cb: &mut dyn FnMut(TextRange, Result<char, rustc_lexer::unescape::EscapeError>),
|
||||||
|
) {
|
||||||
|
let text_range_no_quotes = match self.text_range_between_quotes() {
|
||||||
|
Some(it) => it,
|
||||||
|
None => return,
|
||||||
|
};
|
||||||
|
|
||||||
|
let start = self.syntax().text_range().start();
|
||||||
|
let text = &self.text()[text_range_no_quotes - start];
|
||||||
|
let offset = text_range_no_quotes.start() - start;
|
||||||
|
|
||||||
|
unescape_c_string(text, Self::MODE, &mut |range, unescaped_char| {
|
||||||
|
let text_range =
|
||||||
|
TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
|
||||||
|
// XXX: This method should only be used for highlighting ranges. The unescaped
|
||||||
|
// char/byte is not used. For simplicity, we return an arbitrary placeholder char.
|
||||||
|
cb(text_range + offset, unescaped_char.map(|_| ' '));
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ast::CString {
|
impl ast::CString {
|
||||||
pub fn value(&self) -> Option<Cow<'_, str>> {
|
pub fn value(&self) -> Option<Cow<'_, [u8]>> {
|
||||||
if self.is_raw() {
|
if self.is_raw() {
|
||||||
let text = self.text();
|
let text = self.text();
|
||||||
let text =
|
let text =
|
||||||
&text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
|
&text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
|
||||||
return Some(Cow::Borrowed(text));
|
return Some(Cow::Borrowed(text.as_bytes()));
|
||||||
}
|
}
|
||||||
|
|
||||||
let text = self.text();
|
let text = self.text();
|
||||||
let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
|
let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
|
||||||
|
|
||||||
let mut buf = String::new();
|
let mut buf = Vec::new();
|
||||||
let mut prev_end = 0;
|
let mut prev_end = 0;
|
||||||
let mut has_error = false;
|
let mut has_error = false;
|
||||||
unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match (
|
let mut char_buf = [0u8; 4];
|
||||||
unescaped_char,
|
let mut extend_unit = |buf: &mut Vec<u8>, unit: CStrUnit| match unit {
|
||||||
|
CStrUnit::Byte(b) => buf.push(b),
|
||||||
|
CStrUnit::Char(c) => buf.extend(c.encode_utf8(&mut char_buf).as_bytes()),
|
||||||
|
};
|
||||||
|
unescape_c_string(text, Self::MODE, &mut |char_range, unescaped| match (
|
||||||
|
unescaped,
|
||||||
buf.capacity() == 0,
|
buf.capacity() == 0,
|
||||||
) {
|
) {
|
||||||
(Ok(c), false) => buf.push(c),
|
(Ok(u), false) => extend_unit(&mut buf, u),
|
||||||
(Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
|
(Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
|
||||||
prev_end = char_range.end
|
prev_end = char_range.end
|
||||||
}
|
}
|
||||||
(Ok(c), true) => {
|
(Ok(u), true) => {
|
||||||
buf.reserve_exact(text.len());
|
buf.reserve_exact(text.len());
|
||||||
buf.push_str(&text[..prev_end]);
|
buf.extend(text[..prev_end].as_bytes());
|
||||||
buf.push(c);
|
extend_unit(&mut buf, u);
|
||||||
}
|
}
|
||||||
(Err(_), _) => has_error = true,
|
(Err(_), _) => has_error = true,
|
||||||
});
|
});
|
||||||
|
|
||||||
match (has_error, buf.capacity() == 0) {
|
match (has_error, buf.capacity() == 0) {
|
||||||
(true, _) => None,
|
(true, _) => None,
|
||||||
(false, true) => Some(Cow::Borrowed(text)),
|
(false, true) => Some(Cow::Borrowed(text.as_bytes())),
|
||||||
(false, false) => Some(Cow::Owned(buf)),
|
(false, false) => Some(Cow::Owned(buf)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue