mirror of
https://github.com/rust-lang/rust-analyzer
synced 2025-01-13 21:54:42 +00:00
Fix incorrect encoding of literals in the proc-macro-api on version 4
This commit is contained in:
parent
f913901399
commit
05ce57efd5
12 changed files with 183 additions and 134 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -1046,7 +1046,6 @@ dependencies = [
|
|||
"arrayvec",
|
||||
"cov-mark",
|
||||
"parser",
|
||||
"ra-ap-rustc_lexer",
|
||||
"rustc-hash",
|
||||
"smallvec",
|
||||
"span",
|
||||
|
@ -1326,6 +1325,7 @@ dependencies = [
|
|||
"base-db",
|
||||
"indexmap",
|
||||
"la-arena 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"mbe",
|
||||
"paths",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
|
@ -2218,6 +2218,7 @@ name = "tt"
|
|||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"ra-ap-rustc_lexer",
|
||||
"smol_str",
|
||||
"stdx",
|
||||
"text-size",
|
||||
|
|
|
@ -5,9 +5,10 @@ use base_db::CrateId;
|
|||
use cfg::CfgExpr;
|
||||
use either::Either;
|
||||
use intern::{sym, Interned};
|
||||
|
||||
use mbe::{
|
||||
desugar_doc_comment_text, syntax_node_to_token_tree, token_to_literal, DelimiterKind,
|
||||
DocCommentDesugarMode, Punct,
|
||||
desugar_doc_comment_text, syntax_node_to_token_tree, DelimiterKind, DocCommentDesugarMode,
|
||||
Punct,
|
||||
};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use span::{Span, SyntaxContextId};
|
||||
|
@ -20,7 +21,7 @@ use crate::{
|
|||
db::ExpandDatabase,
|
||||
mod_path::ModPath,
|
||||
span_map::SpanMapRef,
|
||||
tt::{self, Subtree},
|
||||
tt::{self, token_to_literal, Subtree},
|
||||
InFile,
|
||||
};
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ pub use span::{HirFileId, MacroCallId, MacroFileId};
|
|||
|
||||
pub mod tt {
|
||||
pub use span::Span;
|
||||
pub use tt::{DelimiterKind, IdentIsRaw, LitKind, Spacing};
|
||||
pub use tt::{token_to_literal, DelimiterKind, IdentIsRaw, LitKind, Spacing};
|
||||
|
||||
pub type Delimiter = ::tt::Delimiter<Span>;
|
||||
pub type DelimSpan = ::tt::DelimSpan<Span>;
|
||||
|
|
|
@ -17,7 +17,6 @@ rustc-hash.workspace = true
|
|||
smallvec.workspace = true
|
||||
tracing.workspace = true
|
||||
arrayvec.workspace = true
|
||||
ra-ap-rustc_lexer.workspace = true
|
||||
|
||||
# local deps
|
||||
syntax.workspace = true
|
||||
|
@ -30,7 +29,7 @@ span.workspace = true
|
|||
test-utils.workspace = true
|
||||
|
||||
[features]
|
||||
in-rust-tree = ["parser/in-rust-tree", "syntax/in-rust-tree"]
|
||||
in-rust-tree = ["parser/in-rust-tree", "tt/in-rust-tree", "syntax/in-rust-tree"]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
|
|
@ -6,13 +6,6 @@
|
|||
//! The tests for this functionality live in another crate:
|
||||
//! `hir_def::macro_expansion_tests::mbe`.
|
||||
|
||||
#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))]
|
||||
|
||||
#[cfg(not(feature = "in-rust-tree"))]
|
||||
extern crate ra_ap_rustc_lexer as rustc_lexer;
|
||||
#[cfg(feature = "in-rust-tree")]
|
||||
extern crate rustc_lexer;
|
||||
|
||||
mod expander;
|
||||
mod parser;
|
||||
mod syntax_bridge;
|
||||
|
@ -36,7 +29,7 @@ pub use tt::{Delimiter, DelimiterKind, Punct};
|
|||
pub use crate::syntax_bridge::{
|
||||
desugar_doc_comment_text, parse_exprs_with_sep, parse_to_token_tree,
|
||||
parse_to_token_tree_static_span, syntax_node_to_token_tree, syntax_node_to_token_tree_modified,
|
||||
token_to_literal, token_tree_to_syntax_node, DocCommentDesugarMode, SpanMapper,
|
||||
token_tree_to_syntax_node, DocCommentDesugarMode, SpanMapper,
|
||||
};
|
||||
|
||||
pub use crate::syntax_bridge::dummy_test_span_utils::*;
|
||||
|
|
|
@ -4,7 +4,7 @@ use std::fmt;
|
|||
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use span::{Edition, SpanAnchor, SpanData, SpanMap};
|
||||
use stdx::{format_to, itertools::Itertools, never, non_empty_vec::NonEmptyVec};
|
||||
use stdx::{format_to, never, non_empty_vec::NonEmptyVec};
|
||||
use syntax::{
|
||||
ast::{self, make::tokens::doc_comment},
|
||||
format_smolstr, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement,
|
||||
|
@ -14,6 +14,7 @@ use syntax::{
|
|||
use tt::{
|
||||
buffer::{Cursor, TokenBuffer},
|
||||
iter::TtIter,
|
||||
token_to_literal,
|
||||
};
|
||||
|
||||
use crate::to_parser_input::to_parser_input;
|
||||
|
@ -400,56 +401,6 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
pub fn token_to_literal<S>(text: SmolStr, span: S) -> tt::Literal<S>
|
||||
where
|
||||
S: Copy,
|
||||
{
|
||||
use rustc_lexer::LiteralKind;
|
||||
|
||||
let token = rustc_lexer::tokenize(&text).next_tuple();
|
||||
let Some((rustc_lexer::Token {
|
||||
kind: rustc_lexer::TokenKind::Literal { kind, suffix_start },
|
||||
..
|
||||
},)) = token
|
||||
else {
|
||||
return tt::Literal { span, text, kind: tt::LitKind::Err(()), suffix: None };
|
||||
};
|
||||
|
||||
let (kind, start_offset, end_offset) = match kind {
|
||||
LiteralKind::Int { .. } => (tt::LitKind::Integer, 0, 0),
|
||||
LiteralKind::Float { .. } => (tt::LitKind::Float, 0, 0),
|
||||
LiteralKind::Char { terminated } => (tt::LitKind::Char, 1, terminated as usize),
|
||||
LiteralKind::Byte { terminated } => (tt::LitKind::Byte, 2, terminated as usize),
|
||||
LiteralKind::Str { terminated } => (tt::LitKind::Str, 1, terminated as usize),
|
||||
LiteralKind::ByteStr { terminated } => (tt::LitKind::ByteStr, 2, terminated as usize),
|
||||
LiteralKind::CStr { terminated } => (tt::LitKind::CStr, 2, terminated as usize),
|
||||
LiteralKind::RawStr { n_hashes } => (
|
||||
tt::LitKind::StrRaw(n_hashes.unwrap_or_default()),
|
||||
2 + n_hashes.unwrap_or_default() as usize,
|
||||
1 + n_hashes.unwrap_or_default() as usize,
|
||||
),
|
||||
LiteralKind::RawByteStr { n_hashes } => (
|
||||
tt::LitKind::ByteStrRaw(n_hashes.unwrap_or_default()),
|
||||
3 + n_hashes.unwrap_or_default() as usize,
|
||||
1 + n_hashes.unwrap_or_default() as usize,
|
||||
),
|
||||
LiteralKind::RawCStr { n_hashes } => (
|
||||
tt::LitKind::CStrRaw(n_hashes.unwrap_or_default()),
|
||||
3 + n_hashes.unwrap_or_default() as usize,
|
||||
1 + n_hashes.unwrap_or_default() as usize,
|
||||
),
|
||||
};
|
||||
|
||||
let (lit, suffix) = text.split_at(suffix_start as usize);
|
||||
let lit = &lit[start_offset..lit.len() - end_offset];
|
||||
let suffix = match suffix {
|
||||
"" | "_" => None,
|
||||
suffix => Some(Box::new(suffix.into())),
|
||||
};
|
||||
|
||||
tt::Literal { span, text: lit.into(), kind, suffix }
|
||||
}
|
||||
|
||||
fn is_single_token_op(kind: SyntaxKind) -> bool {
|
||||
matches!(
|
||||
kind,
|
||||
|
|
|
@ -28,6 +28,8 @@ span.workspace = true
|
|||
# InternIds for the syntax context
|
||||
base-db.workspace = true
|
||||
la-arena.workspace = true
|
||||
# only here to parse via token_to_literal
|
||||
mbe.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
|
|
@ -197,7 +197,7 @@ mod tests {
|
|||
.into(),
|
||||
),
|
||||
TokenTree::Leaf(Leaf::Literal(Literal {
|
||||
text: "\"Foo\"".into(),
|
||||
text: "Foo".into(),
|
||||
span: Span {
|
||||
range: TextRange::at(TextSize::new(10), TextSize::of("\"Foo\"")),
|
||||
anchor,
|
||||
|
@ -263,10 +263,11 @@ mod tests {
|
|||
#[test]
|
||||
fn test_proc_macro_rpc_works() {
|
||||
let tt = fixture_token_tree();
|
||||
for v in RUST_ANALYZER_SPAN_SUPPORT..=CURRENT_API_VERSION {
|
||||
let mut span_data_table = Default::default();
|
||||
let task = ExpandMacro {
|
||||
data: ExpandMacroData {
|
||||
macro_body: FlatTree::new(&tt, CURRENT_API_VERSION, &mut span_data_table),
|
||||
macro_body: FlatTree::new(&tt, v, &mut span_data_table),
|
||||
macro_name: Default::default(),
|
||||
attributes: None,
|
||||
has_global_spans: ExpnGlobals {
|
||||
|
@ -288,7 +289,9 @@ mod tests {
|
|||
|
||||
assert_eq!(
|
||||
tt,
|
||||
back.data.macro_body.to_subtree_resolved(CURRENT_API_VERSION, &span_data_table)
|
||||
back.data.macro_body.to_subtree_resolved(v, &span_data_table),
|
||||
"version: {v}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -141,6 +141,7 @@ impl FlatTree {
|
|||
ident: Vec::new(),
|
||||
token_tree: Vec::new(),
|
||||
text: Vec::new(),
|
||||
version,
|
||||
};
|
||||
w.write(subtree);
|
||||
|
||||
|
@ -178,6 +179,7 @@ impl FlatTree {
|
|||
ident: Vec::new(),
|
||||
token_tree: Vec::new(),
|
||||
text: Vec::new(),
|
||||
version,
|
||||
};
|
||||
w.write(subtree);
|
||||
|
||||
|
@ -228,6 +230,7 @@ impl FlatTree {
|
|||
token_tree: self.token_tree,
|
||||
text: self.text,
|
||||
span_data_table,
|
||||
version,
|
||||
}
|
||||
.read()
|
||||
}
|
||||
|
@ -253,6 +256,7 @@ impl FlatTree {
|
|||
token_tree: self.token_tree,
|
||||
text: self.text,
|
||||
span_data_table: &(),
|
||||
version,
|
||||
}
|
||||
.read()
|
||||
}
|
||||
|
@ -386,8 +390,9 @@ impl InternableSpan for Span {
|
|||
|
||||
struct Writer<'a, 'span, S: InternableSpan> {
|
||||
work: VecDeque<(usize, &'a tt::Subtree<S>)>,
|
||||
string_table: FxHashMap<&'a str, u32>,
|
||||
string_table: FxHashMap<std::borrow::Cow<'a, str>, u32>,
|
||||
span_data_table: &'span mut S::Table,
|
||||
version: u32,
|
||||
|
||||
subtree: Vec<SubtreeRepr>,
|
||||
literal: Vec<LiteralRepr>,
|
||||
|
@ -425,9 +430,15 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> {
|
|||
tt::TokenTree::Leaf(leaf) => match leaf {
|
||||
tt::Leaf::Literal(lit) => {
|
||||
let idx = self.literal.len() as u32;
|
||||
let text = self.intern(&lit.text);
|
||||
let id = self.token_id_of(lit.span);
|
||||
let suffix = lit.suffix.as_ref().map(|s| self.intern(s)).unwrap_or(!0);
|
||||
let (text, suffix) = if self.version >= EXTENDED_LEAF_DATA {
|
||||
(
|
||||
self.intern(&lit.text),
|
||||
lit.suffix.as_ref().map(|s| self.intern(s)).unwrap_or(!0),
|
||||
)
|
||||
} else {
|
||||
(self.intern_owned(format!("{lit}")), !0)
|
||||
};
|
||||
self.literal.push(LiteralRepr {
|
||||
id,
|
||||
text,
|
||||
|
@ -456,13 +467,15 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> {
|
|||
}
|
||||
tt::Leaf::Ident(ident) => {
|
||||
let idx = self.ident.len() as u32;
|
||||
let text = self.intern(&ident.text);
|
||||
let id = self.token_id_of(ident.span);
|
||||
self.ident.push(IdentRepr {
|
||||
id,
|
||||
text,
|
||||
is_raw: ident.is_raw == tt::IdentIsRaw::Yes,
|
||||
});
|
||||
let text = if self.version >= EXTENDED_LEAF_DATA {
|
||||
self.intern(&ident.text)
|
||||
} else if ident.is_raw.yes() {
|
||||
self.intern_owned(format!("r#{}", ident.text,))
|
||||
} else {
|
||||
self.intern(&ident.text)
|
||||
};
|
||||
self.ident.push(IdentRepr { id, text, is_raw: ident.is_raw.yes() });
|
||||
idx << 2 | 0b11
|
||||
}
|
||||
},
|
||||
|
@ -484,15 +497,25 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> {
|
|||
|
||||
pub(crate) fn intern(&mut self, text: &'a str) -> u32 {
|
||||
let table = &mut self.text;
|
||||
*self.string_table.entry(text).or_insert_with(|| {
|
||||
*self.string_table.entry(text.into()).or_insert_with(|| {
|
||||
let idx = table.len();
|
||||
table.push(text.to_owned());
|
||||
idx as u32
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn intern_owned(&mut self, text: String) -> u32 {
|
||||
let table = &mut self.text;
|
||||
*self.string_table.entry(text.clone().into()).or_insert_with(|| {
|
||||
let idx = table.len();
|
||||
table.push(text);
|
||||
idx as u32
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct Reader<'span, S: InternableSpan> {
|
||||
version: u32,
|
||||
subtree: Vec<SubtreeRepr>,
|
||||
literal: Vec<LiteralRepr>,
|
||||
punct: Vec<PunctRepr>,
|
||||
|
@ -528,9 +551,12 @@ impl<'span, S: InternableSpan> Reader<'span, S> {
|
|||
0b01 => {
|
||||
use tt::LitKind::*;
|
||||
let repr = &self.literal[idx];
|
||||
tt::Leaf::Literal(tt::Literal {
|
||||
text: self.text[repr.text as usize].as_str().into(),
|
||||
span: read_span(repr.id),
|
||||
let text = self.text[repr.text as usize].as_str();
|
||||
let span = read_span(repr.id);
|
||||
tt::Leaf::Literal(if self.version >= EXTENDED_LEAF_DATA {
|
||||
tt::Literal {
|
||||
text: text.into(),
|
||||
span,
|
||||
kind: match u16::to_le_bytes(repr.kind) {
|
||||
[0, _] => Err(()),
|
||||
[1, _] => Byte,
|
||||
|
@ -552,6 +578,9 @@ impl<'span, S: InternableSpan> Reader<'span, S> {
|
|||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
} else {
|
||||
tt::token_to_literal(text.into(), span)
|
||||
})
|
||||
.into()
|
||||
}
|
||||
|
@ -566,14 +595,23 @@ impl<'span, S: InternableSpan> Reader<'span, S> {
|
|||
}
|
||||
0b11 => {
|
||||
let repr = &self.ident[idx];
|
||||
tt::Leaf::Ident(tt::Ident {
|
||||
text: self.text[repr.text as usize].as_str().into(),
|
||||
span: read_span(repr.id),
|
||||
is_raw: if repr.is_raw {
|
||||
let text = self.text[repr.text as usize].as_str();
|
||||
let (is_raw, text) = if self.version >= EXTENDED_LEAF_DATA {
|
||||
(
|
||||
if repr.is_raw {
|
||||
tt::IdentIsRaw::Yes
|
||||
} else {
|
||||
tt::IdentIsRaw::No
|
||||
},
|
||||
text,
|
||||
)
|
||||
} else {
|
||||
tt::IdentIsRaw::split_from_symbol(text)
|
||||
};
|
||||
tt::Leaf::Ident(tt::Ident {
|
||||
text: text.into(),
|
||||
span: read_span(repr.id),
|
||||
is_raw,
|
||||
})
|
||||
.into()
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ proc-macro-test.path = "./proc-macro-test"
|
|||
|
||||
[features]
|
||||
sysroot-abi = []
|
||||
in-rust-tree = ["mbe/in-rust-tree", "sysroot-abi"]
|
||||
in-rust-tree = ["mbe/in-rust-tree", "tt/in-rust-tree","sysroot-abi"]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
|
|
@ -17,6 +17,10 @@ smol_str.workspace = true
|
|||
text-size.workspace = true
|
||||
|
||||
stdx.workspace = true
|
||||
ra-ap-rustc_lexer.workspace = true
|
||||
|
||||
[features]
|
||||
in-rust-tree = []
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
|
|
@ -2,14 +2,21 @@
|
|||
//! input and output) of macros. It closely mirrors `proc_macro` crate's
|
||||
//! `TokenTree`.
|
||||
|
||||
#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))]
|
||||
|
||||
#[cfg(not(feature = "in-rust-tree"))]
|
||||
extern crate ra_ap_rustc_lexer as rustc_lexer;
|
||||
#[cfg(feature = "in-rust-tree")]
|
||||
extern crate rustc_lexer;
|
||||
|
||||
pub mod buffer;
|
||||
pub mod iter;
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use stdx::impl_from;
|
||||
use stdx::{impl_from, itertools::Itertools as _};
|
||||
|
||||
pub use smol_str::SmolStr;
|
||||
pub use smol_str::{format_smolstr, SmolStr};
|
||||
pub use text_size::{TextRange, TextSize};
|
||||
|
||||
#[derive(Clone, PartialEq, Debug)]
|
||||
|
@ -196,6 +203,56 @@ pub struct Literal<S> {
|
|||
pub suffix: Option<Box<SmolStr>>,
|
||||
}
|
||||
|
||||
pub fn token_to_literal<S>(text: SmolStr, span: S) -> Literal<S>
|
||||
where
|
||||
S: Copy,
|
||||
{
|
||||
use rustc_lexer::LiteralKind;
|
||||
|
||||
let token = rustc_lexer::tokenize(&text).next_tuple();
|
||||
let Some((rustc_lexer::Token {
|
||||
kind: rustc_lexer::TokenKind::Literal { kind, suffix_start },
|
||||
..
|
||||
},)) = token
|
||||
else {
|
||||
return Literal { span, text, kind: LitKind::Err(()), suffix: None };
|
||||
};
|
||||
|
||||
let (kind, start_offset, end_offset) = match kind {
|
||||
LiteralKind::Int { .. } => (LitKind::Integer, 0, 0),
|
||||
LiteralKind::Float { .. } => (LitKind::Float, 0, 0),
|
||||
LiteralKind::Char { terminated } => (LitKind::Char, 1, terminated as usize),
|
||||
LiteralKind::Byte { terminated } => (LitKind::Byte, 2, terminated as usize),
|
||||
LiteralKind::Str { terminated } => (LitKind::Str, 1, terminated as usize),
|
||||
LiteralKind::ByteStr { terminated } => (LitKind::ByteStr, 2, terminated as usize),
|
||||
LiteralKind::CStr { terminated } => (LitKind::CStr, 2, terminated as usize),
|
||||
LiteralKind::RawStr { n_hashes } => (
|
||||
LitKind::StrRaw(n_hashes.unwrap_or_default()),
|
||||
2 + n_hashes.unwrap_or_default() as usize,
|
||||
1 + n_hashes.unwrap_or_default() as usize,
|
||||
),
|
||||
LiteralKind::RawByteStr { n_hashes } => (
|
||||
LitKind::ByteStrRaw(n_hashes.unwrap_or_default()),
|
||||
3 + n_hashes.unwrap_or_default() as usize,
|
||||
1 + n_hashes.unwrap_or_default() as usize,
|
||||
),
|
||||
LiteralKind::RawCStr { n_hashes } => (
|
||||
LitKind::CStrRaw(n_hashes.unwrap_or_default()),
|
||||
3 + n_hashes.unwrap_or_default() as usize,
|
||||
1 + n_hashes.unwrap_or_default() as usize,
|
||||
),
|
||||
};
|
||||
|
||||
let (lit, suffix) = text.split_at(suffix_start as usize);
|
||||
let lit = &lit[start_offset..lit.len() - end_offset];
|
||||
let suffix = match suffix {
|
||||
"" | "_" => None,
|
||||
suffix => Some(Box::new(suffix.into())),
|
||||
};
|
||||
|
||||
Literal { span, text: lit.into(), kind, suffix }
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct Punct<S> {
|
||||
pub char: char,
|
||||
|
|
Loading…
Reference in a new issue