Adapt format specifier highlighting to support escaped squences and unicode identifiers

This commit is contained in:
Leander Tentrup 2020-04-22 15:28:35 +02:00
parent b2829a5216
commit 445052f6d4
4 changed files with 208 additions and 143 deletions

View file

@ -74,4 +74,9 @@ pre { color: #DCDCCC; background: #3F3F3F; font-size: 22px; padd
<span class="macro">println!</span>(<span class="string_literal">"</span><span class="attribute">{</span><span class="attribute">}</span><span class="string_literal">, `</span><span class="attribute">{</span><span class="variable">name</span><span class="attribute">:</span><span class="attribute">&gt;</span><span class="numeric_literal">8</span><span class="attribute">.</span><span class="attribute">*</span><span class="attribute">}</span><span class="string_literal">` has 3 right-aligned characters"</span>, <span class="string_literal">"Hello"</span>, <span class="numeric_literal">3</span>, name=<span class="string_literal">"1234.56"</span>); <span class="macro">println!</span>(<span class="string_literal">"</span><span class="attribute">{</span><span class="attribute">}</span><span class="string_literal">, `</span><span class="attribute">{</span><span class="variable">name</span><span class="attribute">:</span><span class="attribute">&gt;</span><span class="numeric_literal">8</span><span class="attribute">.</span><span class="attribute">*</span><span class="attribute">}</span><span class="string_literal">` has 3 right-aligned characters"</span>, <span class="string_literal">"Hello"</span>, <span class="numeric_literal">3</span>, name=<span class="string_literal">"1234.56"</span>);
<span class="macro">println!</span>(<span class="string_literal">"Hello {{}}"</span>); <span class="macro">println!</span>(<span class="string_literal">"Hello {{}}"</span>);
<span class="macro">println!</span>(<span class="string_literal">"{{ Hello"</span>); <span class="macro">println!</span>(<span class="string_literal">"{{ Hello"</span>);
<span class="macro">println!</span>(<span class="string_literal">r"Hello, </span><span class="attribute">{</span><span class="attribute">}</span><span class="string_literal">!"</span>, <span class="string_literal">"world"</span>);
<span class="macro">println!</span>(<span class="string_literal">"</span><span class="attribute">{</span><span class="variable">\x41</span><span class="attribute">}</span><span class="string_literal">"</span>, A = <span class="numeric_literal">92</span>);
<span class="macro">println!</span>(<span class="string_literal">"</span><span class="attribute">{</span><span class="variable">ничоси</span><span class="attribute">}</span><span class="string_literal">"</span>, ничоси = <span class="numeric_literal">92</span>);
}</code></pre> }</code></pre>

View file

@ -245,28 +245,29 @@ pub(crate) fn highlight(
stack.push(); stack.push();
if is_format_string { if is_format_string {
string.lex_format_specifier(|piece_range, kind| { string.lex_format_specifier(|piece_range, kind| {
let highlight = match kind { if let Some(highlight) = highlight_format_specifier(kind) {
FormatSpecifier::Open stack.add(HighlightedRange {
| FormatSpecifier::Close range: piece_range + range.start(),
| FormatSpecifier::Colon highlight: highlight.into(),
| FormatSpecifier::Fill binding_hash: None,
| FormatSpecifier::Align });
| FormatSpecifier::Sign }
| FormatSpecifier::NumberSign });
| FormatSpecifier::DollarSign }
| FormatSpecifier::Dot stack.pop();
| FormatSpecifier::Asterisk } else if let Some(string) =
| FormatSpecifier::QuestionMark => HighlightTag::Attribute, element_to_highlight.as_token().cloned().and_then(ast::RawString::cast)
FormatSpecifier::Integer | FormatSpecifier::Zero => { {
HighlightTag::NumericLiteral stack.push();
} if is_format_string {
FormatSpecifier::Identifier => HighlightTag::Local, string.lex_format_specifier(|piece_range, kind| {
}; if let Some(highlight) = highlight_format_specifier(kind) {
stack.add(HighlightedRange { stack.add(HighlightedRange {
range: piece_range + range.start(), range: piece_range + range.start(),
highlight: highlight.into(), highlight: highlight.into(),
binding_hash: None, binding_hash: None,
}); });
}
}); });
} }
stack.pop(); stack.pop();
@ -277,6 +278,24 @@ pub(crate) fn highlight(
stack.flattened() stack.flattened()
} }
fn highlight_format_specifier(kind: FormatSpecifier) -> Option<HighlightTag> {
Some(match kind {
FormatSpecifier::Open
| FormatSpecifier::Close
| FormatSpecifier::Colon
| FormatSpecifier::Fill
| FormatSpecifier::Align
| FormatSpecifier::Sign
| FormatSpecifier::NumberSign
| FormatSpecifier::DollarSign
| FormatSpecifier::Dot
| FormatSpecifier::Asterisk
| FormatSpecifier::QuestionMark => HighlightTag::Attribute,
FormatSpecifier::Integer | FormatSpecifier::Zero => HighlightTag::NumericLiteral,
FormatSpecifier::Identifier => HighlightTag::Local,
})
}
fn macro_call_range(macro_call: &ast::MacroCall) -> Option<TextRange> { fn macro_call_range(macro_call: &ast::MacroCall) -> Option<TextRange> {
let path = macro_call.path()?; let path = macro_call.path()?;
let name_ref = path.segment()?.name_ref()?; let name_ref = path.segment()?.name_ref()?;

View file

@ -223,6 +223,11 @@ fn main() {
println!("{}, `{name:>8.*}` has 3 right-aligned characters", "Hello", 3, name="1234.56"); println!("{}, `{name:>8.*}` has 3 right-aligned characters", "Hello", 3, name="1234.56");
println!("Hello {{}}"); println!("Hello {{}}");
println!("{{ Hello"); println!("{{ Hello");
println!(r"Hello, {}!", "world");
println!("{\x41}", A = 92);
println!("{ничоси}", ничоси = 92);
}"# }"#
.trim(), .trim(),
); );

View file

@ -192,68 +192,76 @@ pub enum FormatSpecifier {
} }
pub trait HasFormatSpecifier: AstToken { pub trait HasFormatSpecifier: AstToken {
fn char_ranges(
&self,
) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>;
fn lex_format_specifier<F>(&self, mut callback: F) fn lex_format_specifier<F>(&self, mut callback: F)
where where
F: FnMut(TextRange, FormatSpecifier), F: FnMut(TextRange, FormatSpecifier),
{ {
let src = self.text().as_str(); let char_ranges = if let Some(char_ranges) = self.char_ranges() {
let initial_len = src.len(); char_ranges
let mut chars = src.chars(); } else {
return;
};
let mut chars = char_ranges.iter().peekable();
while let Some(first_char) = chars.next() { while let Some((range, first_char)) = chars.next() {
match first_char { match first_char {
'{' => { Ok('{') => {
// Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
if chars.clone().next() == Some('{') { if let Some((_, Ok('{'))) = chars.peek() {
// Escaped format specifier, `{{` // Escaped format specifier, `{{`
chars.next(); chars.next();
continue; continue;
} }
let start = initial_len - chars.as_str().len() - first_char.len_utf8(); callback(*range, FormatSpecifier::Open);
let end = initial_len - chars.as_str().len();
callback(
TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)),
FormatSpecifier::Open,
);
// check for integer/identifier // check for integer/identifier
match chars.clone().next().unwrap_or_default() { match chars
.peek()
.and_then(|next| next.1.as_ref().ok())
.copied()
.unwrap_or_default()
{
'0'..='9' => { '0'..='9' => {
// integer // integer
read_integer(&mut chars, initial_len, &mut callback); read_integer(&mut chars, &mut callback);
} }
'a'..='z' | 'A'..='Z' | '_' => { c if c == '_' || c.is_alphabetic() => {
// identifier // identifier
read_identifier(&mut chars, initial_len, &mut callback); read_identifier(&mut chars, &mut callback);
} }
_ => {} _ => {}
} }
if chars.clone().next() == Some(':') { if let Some((_, Ok(':'))) = chars.peek() {
skip_char_and_emit( skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
&mut chars,
initial_len,
FormatSpecifier::Colon,
&mut callback,
);
// check for fill/align // check for fill/align
let mut cloned = chars.clone().take(2); let mut cloned = chars.clone().take(2);
let first = cloned.next().unwrap_or_default(); let first = cloned
let second = cloned.next().unwrap_or_default(); .next()
.and_then(|next| next.1.as_ref().ok())
.copied()
.unwrap_or_default();
let second = cloned
.next()
.and_then(|next| next.1.as_ref().ok())
.copied()
.unwrap_or_default();
match second { match second {
'<' | '^' | '>' => { '<' | '^' | '>' => {
// alignment specifier, first char specifies fillment // alignment specifier, first char specifies fillment
skip_char_and_emit( skip_char_and_emit(
&mut chars, &mut chars,
initial_len,
FormatSpecifier::Fill, FormatSpecifier::Fill,
&mut callback, &mut callback,
); );
skip_char_and_emit( skip_char_and_emit(
&mut chars, &mut chars,
initial_len,
FormatSpecifier::Align, FormatSpecifier::Align,
&mut callback, &mut callback,
); );
@ -262,7 +270,6 @@ pub trait HasFormatSpecifier: AstToken {
'<' | '^' | '>' => { '<' | '^' | '>' => {
skip_char_and_emit( skip_char_and_emit(
&mut chars, &mut chars,
initial_len,
FormatSpecifier::Align, FormatSpecifier::Align,
&mut callback, &mut callback,
); );
@ -272,11 +279,15 @@ pub trait HasFormatSpecifier: AstToken {
} }
// check for sign // check for sign
match chars.clone().next().unwrap_or_default() { match chars
.peek()
.and_then(|next| next.1.as_ref().ok())
.copied()
.unwrap_or_default()
{
'+' | '-' => { '+' | '-' => {
skip_char_and_emit( skip_char_and_emit(
&mut chars, &mut chars,
initial_len,
FormatSpecifier::Sign, FormatSpecifier::Sign,
&mut callback, &mut callback,
); );
@ -285,10 +296,9 @@ pub trait HasFormatSpecifier: AstToken {
} }
// check for `#` // check for `#`
if let Some('#') = chars.clone().next() { if let Some((_, Ok('#'))) = chars.peek() {
skip_char_and_emit( skip_char_and_emit(
&mut chars, &mut chars,
initial_len,
FormatSpecifier::NumberSign, FormatSpecifier::NumberSign,
&mut callback, &mut callback,
); );
@ -296,39 +306,39 @@ pub trait HasFormatSpecifier: AstToken {
// check for `0` // check for `0`
let mut cloned = chars.clone().take(2); let mut cloned = chars.clone().take(2);
let first = cloned.next(); let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
let second = cloned.next(); let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
if first == Some('0') && second != Some('$') { if first == Some('0') && second != Some('$') {
skip_char_and_emit( skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
&mut chars,
initial_len,
FormatSpecifier::Zero,
&mut callback,
);
} }
// width // width
match chars.clone().next().unwrap_or_default() { match chars
.peek()
.and_then(|next| next.1.as_ref().ok())
.copied()
.unwrap_or_default()
{
'0'..='9' => { '0'..='9' => {
read_integer(&mut chars, initial_len, &mut callback); read_integer(&mut chars, &mut callback);
if chars.clone().next() == Some('$') { if let Some((_, Ok('$'))) = chars.peek() {
skip_char_and_emit( skip_char_and_emit(
&mut chars, &mut chars,
initial_len,
FormatSpecifier::DollarSign, FormatSpecifier::DollarSign,
&mut callback, &mut callback,
); );
} }
} }
'a'..='z' | 'A'..='Z' | '_' => { c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, initial_len, &mut callback); read_identifier(&mut chars, &mut callback);
if chars.clone().next() != Some('$') { if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
!= Some('$')
{
continue; continue;
} }
skip_char_and_emit( skip_char_and_emit(
&mut chars, &mut chars,
initial_len,
FormatSpecifier::DollarSign, FormatSpecifier::DollarSign,
&mut callback, &mut callback,
); );
@ -337,42 +347,41 @@ pub trait HasFormatSpecifier: AstToken {
} }
// precision // precision
if chars.clone().next() == Some('.') { if let Some((_, Ok('.'))) = chars.peek() {
skip_char_and_emit( skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
&mut chars,
initial_len,
FormatSpecifier::Dot,
&mut callback,
);
match chars.clone().next().unwrap_or_default() { match chars
.peek()
.and_then(|next| next.1.as_ref().ok())
.copied()
.unwrap_or_default()
{
'*' => { '*' => {
skip_char_and_emit( skip_char_and_emit(
&mut chars, &mut chars,
initial_len,
FormatSpecifier::Asterisk, FormatSpecifier::Asterisk,
&mut callback, &mut callback,
); );
} }
'0'..='9' => { '0'..='9' => {
read_integer(&mut chars, initial_len, &mut callback); read_integer(&mut chars, &mut callback);
if chars.clone().next() == Some('$') { if let Some((_, Ok('$'))) = chars.peek() {
skip_char_and_emit( skip_char_and_emit(
&mut chars, &mut chars,
initial_len,
FormatSpecifier::DollarSign, FormatSpecifier::DollarSign,
&mut callback, &mut callback,
); );
} }
} }
'a'..='z' | 'A'..='Z' | '_' => { c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, initial_len, &mut callback); read_identifier(&mut chars, &mut callback);
if chars.clone().next() != Some('$') { if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
!= Some('$')
{
continue; continue;
} }
skip_char_and_emit( skip_char_and_emit(
&mut chars, &mut chars,
initial_len,
FormatSpecifier::DollarSign, FormatSpecifier::DollarSign,
&mut callback, &mut callback,
); );
@ -384,25 +393,29 @@ pub trait HasFormatSpecifier: AstToken {
} }
// type // type
match chars.clone().next().unwrap_or_default() { match chars
.peek()
.and_then(|next| next.1.as_ref().ok())
.copied()
.unwrap_or_default()
{
'?' => { '?' => {
skip_char_and_emit( skip_char_and_emit(
&mut chars, &mut chars,
initial_len,
FormatSpecifier::QuestionMark, FormatSpecifier::QuestionMark,
&mut callback, &mut callback,
); );
} }
'a'..='z' | 'A'..='Z' | '_' => { c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, initial_len, &mut callback); read_identifier(&mut chars, &mut callback);
} }
_ => {} _ => {}
} }
} }
let mut cloned = chars.clone().take(2); let mut cloned = chars.clone().take(2);
let first = cloned.next(); let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
let second = cloned.next(); let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
if first != Some('}') { if first != Some('}') {
continue; continue;
} }
@ -410,15 +423,10 @@ pub trait HasFormatSpecifier: AstToken {
// Escaped format end specifier, `}}` // Escaped format end specifier, `}}`
continue; continue;
} }
skip_char_and_emit( skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
&mut chars,
initial_len,
FormatSpecifier::Close,
&mut callback,
);
} }
_ => { _ => {
while let Some(next_char) = chars.clone().next() { while let Some((_, Ok(next_char))) = chars.peek() {
match next_char { match next_char {
'{' => break, '{' => break,
_ => {} _ => {}
@ -429,69 +437,97 @@ pub trait HasFormatSpecifier: AstToken {
}; };
} }
fn skip_char_and_emit<F>( fn skip_char_and_emit<'a, I, F>(
chars: &mut std::str::Chars, chars: &mut std::iter::Peekable<I>,
initial_len: usize,
emit: FormatSpecifier, emit: FormatSpecifier,
callback: &mut F, callback: &mut F,
) where ) where
I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
F: FnMut(TextRange, FormatSpecifier), F: FnMut(TextRange, FormatSpecifier),
{ {
let start = initial_len - chars.as_str().len(); let (range, _) = chars.next().unwrap();
chars.next(); callback(*range, emit);
let end = initial_len - chars.as_str().len();
callback(
TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)),
emit,
);
} }
fn read_integer<F>(chars: &mut std::str::Chars, initial_len: usize, callback: &mut F) fn read_integer<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
where where
I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
F: FnMut(TextRange, FormatSpecifier), F: FnMut(TextRange, FormatSpecifier),
{ {
let start = initial_len - chars.as_str().len(); let (mut range, c) = chars.next().unwrap();
chars.next(); assert!(c.as_ref().unwrap().is_ascii_digit());
while let Some(next_char) = chars.clone().next() { while let Some((r, Ok(next_char))) = chars.peek() {
match next_char { if next_char.is_ascii_digit() {
'0'..='9' => { chars.next();
chars.next(); range = range.extend_to(r);
} } else {
_ => { break;
break;
}
} }
} }
let end = initial_len - chars.as_str().len(); callback(range, FormatSpecifier::Integer);
callback(
TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)),
FormatSpecifier::Integer,
);
} }
fn read_identifier<F>(chars: &mut std::str::Chars, initial_len: usize, callback: &mut F)
fn read_identifier<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
where where
I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
F: FnMut(TextRange, FormatSpecifier), F: FnMut(TextRange, FormatSpecifier),
{ {
let start = initial_len - chars.as_str().len(); let (mut range, c) = chars.next().unwrap();
chars.next(); assert!(c.as_ref().unwrap().is_alphabetic() || *c.as_ref().unwrap() == '_');
while let Some(next_char) = chars.clone().next() { while let Some((r, Ok(next_char))) = chars.peek() {
match next_char { if *next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => { chars.next();
chars.next(); range = range.extend_to(r);
} } else {
_ => { break;
break;
}
} }
} }
let end = initial_len - chars.as_str().len(); callback(range, FormatSpecifier::Identifier);
callback(
TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)),
FormatSpecifier::Identifier,
);
} }
} }
} }
impl HasFormatSpecifier for String {} impl HasFormatSpecifier for String {
impl HasFormatSpecifier for RawString {} fn char_ranges(
&self,
) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
let text = self.text().as_str();
let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
let mut res = Vec::with_capacity(text.len());
rustc_lexer::unescape::unescape_str(text, &mut |range, unescaped_char| {
res.push((
TextRange::from_to(
TextUnit::from_usize(range.start),
TextUnit::from_usize(range.end),
) + offset,
unescaped_char,
))
});
Some(res)
}
}
impl HasFormatSpecifier for RawString {
fn char_ranges(
&self,
) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
let text = self.text().as_str();
let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
let mut res = Vec::with_capacity(text.len());
for (idx, c) in text.char_indices() {
res.push((
TextRange::from_to(
TextUnit::from_usize(idx),
TextUnit::from_usize(idx + c.len_utf8()),
) + offset,
Ok(c),
));
}
Some(res)
}
}