11294: internal: Move format specifier lexing from syntax to ide_db r=Veykril a=Veykril

bors r+

Co-authored-by: Lukas Wirth <lukastw97@gmail.com>
This commit is contained in:
bors[bot] 2022-01-15 12:18:46 +00:00 committed by GitHub
commit 7a52f83700
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 301 additions and 315 deletions

View file

@ -16,7 +16,7 @@ use hir::{InFile, Name, Semantics};
use ide_db::RootDatabase;
use rustc_hash::FxHashMap;
use syntax::{
ast::{self, HasFormatSpecifier},
ast::{self, IsString},
AstNode, AstToken, NodeOrToken,
SyntaxKind::*,
SyntaxNode, TextRange, WalkEvent, T,
@ -336,8 +336,11 @@ fn traverse(
}
highlight_format_string(hl, &string, &expanded_string, range);
// Highlight escape sequences
if let Some(char_ranges) = string.char_ranges() {
for (piece_range, _) in char_ranges.iter().filter(|(_, char)| char.is_ok()) {
string.escaped_char_ranges(&mut |piece_range, char| {
if char.is_err() {
return;
}
if string.text()[piece_range.start().into()..].starts_with('\\') {
hl.add(HlRange {
range: piece_range + range.start(),
@ -345,8 +348,7 @@ fn traverse(
binding_hash: None,
});
}
}
}
});
}
}

View file

@ -1,9 +1,9 @@
//! Syntax highlighting for format macro strings.
use ide_db::{helpers::format_string::is_format_string, SymbolKind};
use syntax::{
ast::{self, FormatSpecifier, HasFormatSpecifier},
TextRange,
use ide_db::{
helpers::format_string::{is_format_string, lex_format_specifiers, FormatSpecifier},
SymbolKind,
};
use syntax::{ast, TextRange};
use crate::{syntax_highlighting::highlights::Highlights, HlRange, HlTag};
@ -17,7 +17,7 @@ pub(super) fn highlight_format_string(
return;
}
string.lex_format_specifier(|piece_range, kind| {
lex_format_specifiers(string, &mut |piece_range, kind| {
if let Some(highlight) = highlight_format_specifier(kind) {
stack.add(HlRange {
range: piece_range + range.start(),

View file

@ -1,5 +1,8 @@
//! Tools to work with format string literals for the `format_args!` family of macros.
use syntax::{ast, AstNode, AstToken};
use syntax::{
ast::{self, IsString},
AstNode, AstToken, TextRange,
};
pub fn is_format_string(string: &ast::String) -> bool {
// Check if `string` is a format string argument of a macro invocation.
@ -10,7 +13,7 @@ pub fn is_format_string(string: &ast::String) -> bool {
//
// This setup lets us correctly highlight the components of `concat!("{}", "bla")` format
// strings. It still fails for `concat!("{", "}")`, but that is rare.
format!("{string} {bar}", bar = string);
(|| {
let macro_call = string.syntax().ancestors().find_map(ast::MacroCall::cast)?;
let name = macro_call.path()?.segment()?.name_ref()?;
@ -29,3 +32,261 @@ pub fn is_format_string(string: &ast::String) -> bool {
})()
.is_some()
}
#[derive(Debug)]
pub enum FormatSpecifier {
Open,
Close,
Integer,
Identifier,
Colon,
Fill,
Align,
Sign,
NumberSign,
Zero,
DollarSign,
Dot,
Asterisk,
QuestionMark,
}
pub fn lex_format_specifiers(
string: &ast::String,
mut callback: &mut dyn FnMut(TextRange, FormatSpecifier),
) {
let mut char_ranges = Vec::new();
string.escaped_char_ranges(&mut |range, res| char_ranges.push((range, res)));
let mut chars = char_ranges
.iter()
.filter_map(|(range, res)| Some((*range, *res.as_ref().ok()?)))
.peekable();
while let Some((range, first_char)) = chars.next() {
if let '{' = first_char {
// Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
if let Some((_, '{')) = chars.peek() {
// Escaped format specifier, `{{`
chars.next();
continue;
}
callback(range, FormatSpecifier::Open);
// check for integer/identifier
let (_, int_char) = chars.peek().copied().unwrap_or_default();
match int_char {
// integer
'0'..='9' => read_integer(&mut chars, &mut callback),
// identifier
c if c == '_' || c.is_alphabetic() => read_identifier(&mut chars, &mut callback),
_ => {}
}
if let Some((_, ':')) = chars.peek() {
skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
// check for fill/align
let mut cloned = chars.clone().take(2);
let (_, first) = cloned.next().unwrap_or_default();
let (_, second) = cloned.next().unwrap_or_default();
match second {
'<' | '^' | '>' => {
// alignment specifier, first char specifies fillment
skip_char_and_emit(&mut chars, FormatSpecifier::Fill, &mut callback);
skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback);
}
_ => {
if let '<' | '^' | '>' = first {
skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback);
}
}
}
// check for sign
match chars.peek().copied().unwrap_or_default().1 {
'+' | '-' => {
skip_char_and_emit(&mut chars, FormatSpecifier::Sign, &mut callback);
}
_ => {}
}
// check for `#`
if let Some((_, '#')) = chars.peek() {
skip_char_and_emit(&mut chars, FormatSpecifier::NumberSign, &mut callback);
}
// check for `0`
let mut cloned = chars.clone().take(2);
let first = cloned.next().map(|next| next.1);
let second = cloned.next().map(|next| next.1);
if first == Some('0') && second != Some('$') {
skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
}
// width
match chars.peek().copied().unwrap_or_default().1 {
'0'..='9' => {
read_integer(&mut chars, &mut callback);
if let Some((_, '$')) = chars.peek() {
skip_char_and_emit(
&mut chars,
FormatSpecifier::DollarSign,
&mut callback,
);
}
}
c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, &mut callback);
if chars.peek().map(|&(_, c)| c) == Some('?') {
skip_char_and_emit(
&mut chars,
FormatSpecifier::QuestionMark,
&mut callback,
);
}
// can be either width (indicated by dollar sign, or type in which case
// the next sign has to be `}`)
let next = chars.peek().map(|&(_, c)| c);
match next {
Some('$') => skip_char_and_emit(
&mut chars,
FormatSpecifier::DollarSign,
&mut callback,
),
Some('}') => {
skip_char_and_emit(
&mut chars,
FormatSpecifier::Close,
&mut callback,
);
continue;
}
_ => continue,
};
}
_ => {}
}
// precision
if let Some((_, '.')) = chars.peek() {
skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
match chars.peek().copied().unwrap_or_default().1 {
'*' => {
skip_char_and_emit(
&mut chars,
FormatSpecifier::Asterisk,
&mut callback,
);
}
'0'..='9' => {
read_integer(&mut chars, &mut callback);
if let Some((_, '$')) = chars.peek() {
skip_char_and_emit(
&mut chars,
FormatSpecifier::DollarSign,
&mut callback,
);
}
}
c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, &mut callback);
if chars.peek().map(|&(_, c)| c) != Some('$') {
continue;
}
skip_char_and_emit(
&mut chars,
FormatSpecifier::DollarSign,
&mut callback,
);
}
_ => {
continue;
}
}
}
// type
match chars.peek().copied().unwrap_or_default().1 {
'?' => {
skip_char_and_emit(
&mut chars,
FormatSpecifier::QuestionMark,
&mut callback,
);
}
c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, &mut callback);
if chars.peek().map(|&(_, c)| c) == Some('?') {
skip_char_and_emit(
&mut chars,
FormatSpecifier::QuestionMark,
&mut callback,
);
}
}
_ => {}
}
}
if let Some((_, '}')) = chars.peek() {
skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
}
continue;
}
}
fn skip_char_and_emit<I, F>(
chars: &mut std::iter::Peekable<I>,
emit: FormatSpecifier,
callback: &mut F,
) where
I: Iterator<Item = (TextRange, char)>,
F: FnMut(TextRange, FormatSpecifier),
{
let (range, _) = chars.next().unwrap();
callback(range, emit);
}
fn read_integer<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
where
I: Iterator<Item = (TextRange, char)>,
F: FnMut(TextRange, FormatSpecifier),
{
let (mut range, c) = chars.next().unwrap();
assert!(c.is_ascii_digit());
while let Some(&(r, next_char)) = chars.peek() {
if next_char.is_ascii_digit() {
chars.next();
range = range.cover(r);
} else {
break;
}
}
callback(range, FormatSpecifier::Integer);
}
fn read_identifier<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
where
I: Iterator<Item = (TextRange, char)>,
F: FnMut(TextRange, FormatSpecifier),
{
let (mut range, c) = chars.next().unwrap();
assert!(c.is_alphabetic() || c == '_');
while let Some(&(r, next_char)) = chars.peek() {
if next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
chars.next();
range = range.cover(r);
} else {
break;
}
}
callback(range, FormatSpecifier::Identifier);
}
}

View file

@ -25,10 +25,7 @@ pub use self::{
SlicePatComponents, StructKind, TypeBoundKind, VisibilityKind,
},
operators::{ArithOp, BinaryOp, CmpOp, LogicOp, Ordering, RangeOp, UnaryOp},
token_ext::{
CommentKind, CommentPlacement, CommentShape, FormatSpecifier, HasFormatSpecifier, IsString,
QuoteOffsets, Radix,
},
token_ext::{CommentKind, CommentPlacement, CommentShape, IsString, QuoteOffsets, Radix},
traits::{
DocCommentIter, HasArgList, HasAttrs, HasDocComments, HasGenericParams, HasLoopBody,
HasModuleItem, HasName, HasTypeBounds, HasVisibility,

View file

@ -164,6 +164,25 @@ pub trait IsString: AstToken {
fn close_quote_text_range(&self) -> Option<TextRange> {
self.quote_offsets().map(|it| it.quotes.1)
}
fn escaped_char_ranges(
&self,
cb: &mut dyn FnMut(TextRange, Result<char, rustc_lexer::unescape::EscapeError>),
) {
let text_range_no_quotes = match self.text_range_between_quotes() {
Some(it) => it,
None => return,
};
let start = self.syntax().text_range().start();
let text = &self.text()[text_range_no_quotes - start];
let offset = text_range_no_quotes.start() - start;
unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
let text_range =
TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
cb(text_range + offset, unescaped_char);
});
}
}
impl IsString for ast::String {}
@ -257,299 +276,6 @@ impl ast::ByteString {
}
}
#[derive(Debug)]
pub enum FormatSpecifier {
Open,
Close,
Integer,
Identifier,
Colon,
Fill,
Align,
Sign,
NumberSign,
Zero,
DollarSign,
Dot,
Asterisk,
QuestionMark,
}
pub trait HasFormatSpecifier: AstToken {
fn char_ranges(
&self,
) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>;
fn lex_format_specifier<F>(&self, mut callback: F)
where
F: FnMut(TextRange, FormatSpecifier),
{
let char_ranges = match self.char_ranges() {
Some(char_ranges) => char_ranges,
None => return,
};
let mut chars = char_ranges
.iter()
.filter_map(|(range, res)| Some((*range, *res.as_ref().ok()?)))
.peekable();
while let Some((range, first_char)) = chars.next() {
if let '{' = first_char {
// Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
if let Some((_, '{')) = chars.peek() {
// Escaped format specifier, `{{`
chars.next();
continue;
}
callback(range, FormatSpecifier::Open);
// check for integer/identifier
let (_, int_char) = chars.peek().copied().unwrap_or_default();
match int_char {
// integer
'0'..='9' => read_integer(&mut chars, &mut callback),
// identifier
c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, &mut callback)
}
_ => {}
}
if let Some((_, ':')) = chars.peek() {
skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
// check for fill/align
let mut cloned = chars.clone().take(2);
let (_, first) = cloned.next().unwrap_or_default();
let (_, second) = cloned.next().unwrap_or_default();
match second {
'<' | '^' | '>' => {
// alignment specifier, first char specifies fillment
skip_char_and_emit(&mut chars, FormatSpecifier::Fill, &mut callback);
skip_char_and_emit(&mut chars, FormatSpecifier::Align, &mut callback);
}
_ => {
if let '<' | '^' | '>' = first {
skip_char_and_emit(
&mut chars,
FormatSpecifier::Align,
&mut callback,
);
}
}
}
// check for sign
match chars.peek().copied().unwrap_or_default().1 {
'+' | '-' => {
skip_char_and_emit(&mut chars, FormatSpecifier::Sign, &mut callback);
}
_ => {}
}
// check for `#`
if let Some((_, '#')) = chars.peek() {
skip_char_and_emit(&mut chars, FormatSpecifier::NumberSign, &mut callback);
}
// check for `0`
let mut cloned = chars.clone().take(2);
let first = cloned.next().map(|next| next.1);
let second = cloned.next().map(|next| next.1);
if first == Some('0') && second != Some('$') {
skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
}
// width
match chars.peek().copied().unwrap_or_default().1 {
'0'..='9' => {
read_integer(&mut chars, &mut callback);
if let Some((_, '$')) = chars.peek() {
skip_char_and_emit(
&mut chars,
FormatSpecifier::DollarSign,
&mut callback,
);
}
}
c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, &mut callback);
if chars.peek().map(|&(_, c)| c) == Some('?') {
skip_char_and_emit(
&mut chars,
FormatSpecifier::QuestionMark,
&mut callback,
);
}
// can be either width (indicated by dollar sign, or type in which case
// the next sign has to be `}`)
let next = chars.peek().map(|&(_, c)| c);
match next {
Some('$') => skip_char_and_emit(
&mut chars,
FormatSpecifier::DollarSign,
&mut callback,
),
Some('}') => {
skip_char_and_emit(
&mut chars,
FormatSpecifier::Close,
&mut callback,
);
continue;
}
_ => continue,
};
}
_ => {}
}
// precision
if let Some((_, '.')) = chars.peek() {
skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
match chars.peek().copied().unwrap_or_default().1 {
'*' => {
skip_char_and_emit(
&mut chars,
FormatSpecifier::Asterisk,
&mut callback,
);
}
'0'..='9' => {
read_integer(&mut chars, &mut callback);
if let Some((_, '$')) = chars.peek() {
skip_char_and_emit(
&mut chars,
FormatSpecifier::DollarSign,
&mut callback,
);
}
}
c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, &mut callback);
if chars.peek().map(|&(_, c)| c) != Some('$') {
continue;
}
skip_char_and_emit(
&mut chars,
FormatSpecifier::DollarSign,
&mut callback,
);
}
_ => {
continue;
}
}
}
// type
match chars.peek().copied().unwrap_or_default().1 {
'?' => {
skip_char_and_emit(
&mut chars,
FormatSpecifier::QuestionMark,
&mut callback,
);
}
c if c == '_' || c.is_alphabetic() => {
read_identifier(&mut chars, &mut callback);
if chars.peek().map(|&(_, c)| c) == Some('?') {
skip_char_and_emit(
&mut chars,
FormatSpecifier::QuestionMark,
&mut callback,
);
}
}
_ => {}
}
}
if let Some((_, '}')) = chars.peek() {
skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
}
continue;
}
}
fn skip_char_and_emit<I, F>(
chars: &mut std::iter::Peekable<I>,
emit: FormatSpecifier,
callback: &mut F,
) where
I: Iterator<Item = (TextRange, char)>,
F: FnMut(TextRange, FormatSpecifier),
{
let (range, _) = chars.next().unwrap();
callback(range, emit);
}
fn read_integer<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
where
I: Iterator<Item = (TextRange, char)>,
F: FnMut(TextRange, FormatSpecifier),
{
let (mut range, c) = chars.next().unwrap();
assert!(c.is_ascii_digit());
while let Some(&(r, next_char)) = chars.peek() {
if next_char.is_ascii_digit() {
chars.next();
range = range.cover(r);
} else {
break;
}
}
callback(range, FormatSpecifier::Integer);
}
fn read_identifier<I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
where
I: Iterator<Item = (TextRange, char)>,
F: FnMut(TextRange, FormatSpecifier),
{
let (mut range, c) = chars.next().unwrap();
assert!(c.is_alphabetic() || c == '_');
while let Some(&(r, next_char)) = chars.peek() {
if next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
chars.next();
range = range.cover(r);
} else {
break;
}
}
callback(range, FormatSpecifier::Identifier);
}
}
}
impl HasFormatSpecifier for ast::String {
fn char_ranges(
&self,
) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
let text = self.text();
let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
let mut res = Vec::with_capacity(text.len());
unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
res.push((
TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap())
+ offset,
unescaped_char,
));
});
Some(res)
}
}
impl ast::IntNumber {
pub fn radix(&self) -> Radix {
match self.text().get(..2).unwrap_or_default() {