diff --git a/Cargo.lock b/Cargo.lock index b165697724..e43f712a6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1046,6 +1046,7 @@ dependencies = [ "arrayvec", "cov-mark", "parser", + "ra-ap-rustc_lexer", "rustc-hash", "smallvec", "span", diff --git a/crates/hir-def/src/attr.rs b/crates/hir-def/src/attr.rs index aacfb07319..5bd60c58e9 100644 --- a/crates/hir-def/src/attr.rs +++ b/crates/hir-def/src/attr.rs @@ -605,7 +605,7 @@ impl<'attr> AttrQuery<'attr> { .nth(2); match name { - Some(tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal{ ref text, ..}))) => Some(text), + Some(tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal{ text, kind: tt::LitKind::Str | tt::LitKind::StrRaw(_) , ..}))) => Some(text), _ => None } }) diff --git a/crates/hir-def/src/hir/format_args.rs b/crates/hir-def/src/hir/format_args.rs index d6dd5df373..cf176e86db 100644 --- a/crates/hir-def/src/hir/format_args.rs +++ b/crates/hir-def/src/hir/format_args.rs @@ -250,7 +250,7 @@ pub(crate) fn parse( } } ArgRef::Name(name, span) => { - let name = Name::new(name, call_ctx); + let name = Name::new(name, tt::IdentIsRaw::No, call_ctx); if let Some((index, _)) = args.by_name(&name) { record_usage(name, span); // Name found in `args`, so we resolve it to its index. diff --git a/crates/hir-def/src/macro_expansion_tests/builtin_fn_macro.rs b/crates/hir-def/src/macro_expansion_tests/builtin_fn_macro.rs index 9596100b60..ab8bb8bd4c 100644 --- a/crates/hir-def/src/macro_expansion_tests/builtin_fn_macro.rs +++ b/crates/hir-def/src/macro_expansion_tests/builtin_fn_macro.rs @@ -460,13 +460,13 @@ fn test_concat_expand() { #[rustc_builtin_macro] macro_rules! concat {} -fn main() { concat!("fo", "o", 0, r#"bar"#, "\n", false, '"', '\0'); } +fn main() { concat!("fo", "o", 0, r#""bar""#, "\n", false, '"', '\0'); } "##, expect![[r##" #[rustc_builtin_macro] macro_rules! concat {} -fn main() { "foo0bar\nfalse\"\u{0}"; } +fn main() { "foo0\"bar\"\nfalse\"\u{0}"; } "##]], ); } @@ -478,13 +478,13 @@ fn test_concat_bytes_expand() { #[rustc_builtin_macro] macro_rules! concat_bytes {} -fn main() { concat_bytes!(b'A', b"BC", [68, b'E', 70]); } +fn main() { concat_bytes!(b'A', b"BC\"", [68, b'E', 70], br#"G""#,b'\0'); } "##, expect![[r#" #[rustc_builtin_macro] macro_rules! concat_bytes {} -fn main() { [b'A', 66, 67, 68, b'E', 70]; } +fn main() { b"ABC\"DEFG\"\x00"; } "#]], ); } diff --git a/crates/hir-def/src/macro_expansion_tests/mbe/regression.rs b/crates/hir-def/src/macro_expansion_tests/mbe/regression.rs index 4aad53c3bd..252b0bb1b5 100644 --- a/crates/hir-def/src/macro_expansion_tests/mbe/regression.rs +++ b/crates/hir-def/src/macro_expansion_tests/mbe/regression.rs @@ -1058,7 +1058,7 @@ macro_rules! concat {} macro_rules! line {} fn main() { - "event 0u32"; + "event 0"; } "##]], @@ -1084,7 +1084,7 @@ fn main() { macro_rules! concat_bytes {} fn main() { - let x = /* error: unexpected token in input */[]; + let x = /* error: unexpected token in input */b""; } "#]], diff --git a/crates/hir-def/src/nameres/collector.rs b/crates/hir-def/src/nameres/collector.rs index f14679f6c2..13abcce78e 100644 --- a/crates/hir-def/src/nameres/collector.rs +++ b/crates/hir-def/src/nameres/collector.rs @@ -82,7 +82,7 @@ pub(super) fn collect_defs(db: &dyn DefDatabase, def_map: DefMap, tree_id: TreeI .iter() .enumerate() .map(|(idx, it)| { - let name = Name::new(&it.name, ctx); + let name = Name::new(&it.name, tt::IdentIsRaw::No, ctx); ( name, if !db.expand_proc_attr_macros() { @@ -2144,7 +2144,7 @@ impl ModCollector<'_, '_> { let name; let name = match attrs.by_key("rustc_builtin_macro").string_value_with_span() { Some((it, span)) => { - name = Name::new(it, span.ctx); + name = Name::new(it, tt::IdentIsRaw::No, span.ctx); &name } None => { diff --git a/crates/hir-expand/src/attrs.rs b/crates/hir-expand/src/attrs.rs index 36636a228f..4fce7c1fde 100644 --- a/crates/hir-expand/src/attrs.rs +++ b/crates/hir-expand/src/attrs.rs @@ -5,11 +5,14 @@ use base_db::CrateId; use cfg::CfgExpr; use either::Either; use intern::{sym, Interned}; -use mbe::{syntax_node_to_token_tree, DelimiterKind, DocCommentDesugarMode, Punct}; +use mbe::{ + desugar_doc_comment_text, syntax_node_to_token_tree, token_to_literal, DelimiterKind, + DocCommentDesugarMode, Punct, +}; use smallvec::{smallvec, SmallVec}; use span::{Span, SyntaxContextId}; use syntax::unescape; -use syntax::{ast, format_smolstr, match_ast, AstNode, AstToken, SmolStr, SyntaxNode}; +use syntax::{ast, match_ast, AstNode, AstToken, SyntaxNode}; use triomphe::ThinArc; use crate::name::Name; @@ -53,11 +56,15 @@ impl RawAttrs { } Either::Right(comment) => comment.doc_comment().map(|doc| { let span = span_map.span_for_range(comment.syntax().text_range()); + let (text, kind) = + desugar_doc_comment_text(doc, DocCommentDesugarMode::ProcMacro); Attr { id, input: Some(Box::new(AttrInput::Literal(tt::Literal { - text: SmolStr::new(format_smolstr!("\"{}\"", Self::escape_chars(doc))), + text, span, + kind, + suffix: None, }))), path: Interned::new(ModPath::from(Name::new_symbol( sym::doc.clone(), @@ -78,10 +85,6 @@ impl RawAttrs { RawAttrs { entries } } - fn escape_chars(s: &str) -> String { - s.replace('\\', r#"\\"#).replace('"', r#"\""#) - } - pub fn from_attrs_owner( db: &dyn ExpandDatabase, owner: InFile<&dyn ast::HasAttrs>, @@ -238,10 +241,8 @@ impl Attr { })?); let span = span_map.span_for_range(range); let input = if let Some(ast::Expr::Literal(lit)) = ast.expr() { - Some(Box::new(AttrInput::Literal(tt::Literal { - text: lit.token().text().into(), - span, - }))) + let token = lit.token(); + Some(Box::new(AttrInput::Literal(token_to_literal(token.text().into(), span)))) } else if let Some(tt) = ast.token_tree() { let tree = syntax_node_to_token_tree( tt.syntax(), @@ -310,12 +311,11 @@ impl Attr { /// #[path = "string"] pub fn string_value(&self) -> Option<&str> { match self.input.as_deref()? { - AttrInput::Literal(it) => match it.text.strip_prefix('r') { - Some(it) => it.trim_matches('#'), - None => it.text.as_str(), - } - .strip_prefix('"')? - .strip_suffix('"'), + AttrInput::Literal(tt::Literal { + text, + kind: tt::LitKind::Str | tt::LitKind::StrRaw(_), + .. + }) => Some(text), _ => None, } } @@ -336,12 +336,10 @@ impl Attr { pub fn string_value_unescape(&self) -> Option> { match self.input.as_deref()? { - AttrInput::Literal(it) => match it.text.strip_prefix('r') { - Some(it) => { - it.trim_matches('#').strip_prefix('"')?.strip_suffix('"').map(Cow::Borrowed) - } - None => it.text.strip_prefix('"')?.strip_suffix('"').and_then(unescape), - }, + AttrInput::Literal(tt::Literal { text, kind: tt::LitKind::StrRaw(_), .. }) => { + Some(Cow::Borrowed(text)) + } + AttrInput::Literal(tt::Literal { text, kind: tt::LitKind::Str, .. }) => unescape(text), _ => None, } } diff --git a/crates/hir-expand/src/builtin_derive_macro.rs b/crates/hir-expand/src/builtin_derive_macro.rs index 269e9f308c..180d8f0562 100644 --- a/crates/hir-expand/src/builtin_derive_macro.rs +++ b/crates/hir-expand/src/builtin_derive_macro.rs @@ -370,7 +370,8 @@ fn name_to_token( ExpandError::other("missing name") })?; let span = token_map.span_at(name.syntax().text_range().start()); - let name_token = tt::Ident { span, text: name.text().into() }; + + let name_token = tt::Ident::new(name.text().as_ref(), span); Ok(name_token) } diff --git a/crates/hir-expand/src/builtin_fn_macro.rs b/crates/hir-expand/src/builtin_fn_macro.rs index 97867dfc66..32befb7a7f 100644 --- a/crates/hir-expand/src/builtin_fn_macro.rs +++ b/crates/hir-expand/src/builtin_fn_macro.rs @@ -1,13 +1,14 @@ //! Builtin macro +use ::tt::SmolStr; use base_db::{AnchoredPath, FileId}; use cfg::CfgExpr; use either::Either; use intern::sym; -use itertools::Itertools; use mbe::{parse_exprs_with_sep, parse_to_token_tree}; use span::{Edition, Span, SpanAnchor, SyntaxContextId, ROOT_ERASED_FILE_AST_ID}; -use syntax::ast::{self, AstToken}; +use stdx::format_to; +use syntax::unescape::{unescape_byte, unescape_char, unescape_unicode, Mode}; use crate::{ db::ExpandDatabase, @@ -177,8 +178,10 @@ fn line_expand( ExpandResult::ok(tt::Subtree { delimiter: tt::Delimiter::invisible_spanned(span), token_trees: Box::new([tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal { - text: "0u32".into(), + text: "0".into(), span, + kind: tt::LitKind::Integer, + suffix: Some(Box::new("u32".into())), }))]), }) } @@ -444,27 +447,6 @@ fn use_panic_2021(db: &dyn ExpandDatabase, span: Span) -> bool { } } -fn unquote_str(lit: &tt::Literal) -> Option<(String, Span)> { - let span = lit.span; - let lit = ast::make::tokens::literal(&lit.to_string()); - let token = ast::String::cast(lit)?; - token.value().ok().map(|it| (it.into_owned(), span)) -} - -fn unquote_char(lit: &tt::Literal) -> Option<(char, Span)> { - let span = lit.span; - let lit = ast::make::tokens::literal(&lit.to_string()); - let token = ast::Char::cast(lit)?; - token.value().ok().zip(Some(span)) -} - -fn unquote_byte_string(lit: &tt::Literal) -> Option<(Vec, Span)> { - let span = lit.span; - let lit = ast::make::tokens::literal(&lit.to_string()); - let token = ast::ByteString::cast(lit)?; - token.value().ok().map(|it| (it.into_owned(), span)) -} - fn compile_error_expand( _db: &dyn ExpandDatabase, _id: MacroCallId, @@ -472,10 +454,16 @@ fn compile_error_expand( span: Span, ) -> ExpandResult { let err = match &*tt.token_trees { - [tt::TokenTree::Leaf(tt::Leaf::Literal(it))] => match unquote_str(it) { - Some((unquoted, _)) => ExpandError::other(unquoted.into_boxed_str()), - None => ExpandError::other("`compile_error!` argument must be a string"), - }, + [tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal { + text, + span: _, + kind: tt::LitKind::Str | tt::LitKind::StrRaw(_), + suffix: _, + }))] => + // FIXME: Use the span here! + { + ExpandError::other(Box::from(&*unescape_str(text))) + } _ => ExpandError::other("`compile_error!` argument must be a string"), }; @@ -507,20 +495,33 @@ fn concat_expand( } } } - match t { tt::TokenTree::Leaf(tt::Leaf::Literal(it)) if i % 2 == 0 => { // concat works with string and char literals, so remove any quotes. // It also works with integer, float and boolean literals, so just use the rest // as-is. - if let Some((c, span)) = unquote_char(it) { - text.push(c); - record_span(span); - } else { - let (component, span) = - unquote_str(it).unwrap_or_else(|| (it.text.to_string(), it.span)); - text.push_str(&component); - record_span(span); + match it.kind { + tt::LitKind::Char => { + if let Ok(c) = unescape_char(&it.text) { + text.extend(c.escape_default()); + } + record_span(it.span); + } + tt::LitKind::Integer | tt::LitKind::Float => format_to!(text, "{}", it.text), + tt::LitKind::Str => { + text.push_str(&it.text); + record_span(it.span); + } + tt::LitKind::StrRaw(_) => { + format_to!(text, "{}", it.text.escape_debug()); + record_span(it.span); + } + tt::LitKind::Byte + | tt::LitKind::ByteStr + | tt::LitKind::ByteStrRaw(_) + | tt::LitKind::CStr + | tt::LitKind::CStrRaw(_) + | tt::LitKind::Err(_) => err = Some(ExpandError::other("unexpected literal")), } } // handle boolean literals @@ -544,9 +545,9 @@ fn concat_bytes_expand( _db: &dyn ExpandDatabase, _arg_id: MacroCallId, tt: &tt::Subtree, - call_site: Span, + _: Span, ) -> ExpandResult { - let mut bytes = Vec::new(); + let mut bytes = String::new(); let mut err = None; let mut span: Option = None; let mut record_span = |s: Span| match &mut span { @@ -556,14 +557,21 @@ fn concat_bytes_expand( }; for (i, t) in tt.token_trees.iter().enumerate() { match t { - tt::TokenTree::Leaf(tt::Leaf::Literal(lit)) => { - let token = ast::make::tokens::literal(&lit.to_string()); - record_span(lit.span); - match token.kind() { - syntax::SyntaxKind::BYTE => bytes.push(token.text().to_owned()), - syntax::SyntaxKind::BYTE_STRING => { - let components = unquote_byte_string(lit).map_or(vec![], |(it, _)| it); - components.into_iter().for_each(|it| bytes.push(it.to_string())); + tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal { text, span, kind, suffix: _ })) => { + record_span(*span); + match kind { + tt::LitKind::Byte => { + if let Ok(b) = unescape_byte(text) { + bytes.extend( + b.escape_ascii().filter_map(|it| char::from_u32(it as u32)), + ); + } + } + tt::LitKind::ByteStr => { + bytes.push_str(text); + } + tt::LitKind::ByteStrRaw(_) => { + bytes.extend(text.escape_debug()); } _ => { err.get_or_insert(mbe::ExpandError::UnexpectedToken.into()); @@ -584,51 +592,49 @@ fn concat_bytes_expand( } } } - let value = tt::Subtree { - delimiter: tt::Delimiter { - open: call_site, - close: call_site, - kind: tt::DelimiterKind::Bracket, + let span = span.unwrap_or(tt.delimiter.open); + ExpandResult { + value: tt::Subtree { + delimiter: tt::Delimiter::invisible_spanned(span), + token_trees: vec![tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal { + text: bytes.into(), + span, + kind: tt::LitKind::ByteStr, + suffix: None, + }))] + .into(), }, - token_trees: { - Itertools::intersperse_with( - bytes.into_iter().map(|it| { - tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal { - text: it.into(), - span: span.unwrap_or(call_site), - })) - }), - || { - tt::TokenTree::Leaf(tt::Leaf::Punct(tt::Punct { - char: ',', - spacing: tt::Spacing::Alone, - span: call_site, - })) - }, - ) - .collect() - }, - }; - ExpandResult { value, err } + err, + } } fn concat_bytes_expand_subtree( tree: &tt::Subtree, - bytes: &mut Vec, + bytes: &mut String, mut record_span: impl FnMut(Span), ) -> Result<(), ExpandError> { for (ti, tt) in tree.token_trees.iter().enumerate() { match tt { - tt::TokenTree::Leaf(tt::Leaf::Literal(it)) => { - let lit = ast::make::tokens::literal(&it.to_string()); - match lit.kind() { - syntax::SyntaxKind::BYTE | syntax::SyntaxKind::INT_NUMBER => { - record_span(it.span); - bytes.push(lit.text().to_owned()) - } - _ => { - return Err(mbe::ExpandError::UnexpectedToken.into()); - } + tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal { + text, + span, + kind: tt::LitKind::Byte, + suffix: _, + })) => { + if let Ok(b) = unescape_byte(text) { + bytes.extend(b.escape_ascii().filter_map(|it| char::from_u32(it as u32))); + } + record_span(*span); + } + tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal { + text, + span, + kind: tt::LitKind::Integer, + suffix: _, + })) => { + record_span(*span); + if let Ok(b) = text.parse::() { + bytes.extend(b.escape_ascii().filter_map(|it| char::from_u32(it as u32))); } } tt::TokenTree::Leaf(tt::Leaf::Punct(punct)) if ti % 2 == 1 && punct.char == ',' => (), @@ -660,7 +666,7 @@ fn concat_idents_expand( } } // FIXME merge spans - let ident = tt::Ident { text: ident.into(), span }; + let ident = tt::Ident { text: ident.into(), span, is_raw: tt::IdentIsRaw::No }; ExpandResult { value: quote!(span =>#ident), err } } @@ -683,11 +689,16 @@ fn relative_file( } } -fn parse_string(tt: &tt::Subtree) -> Result<(String, Span), ExpandError> { +fn parse_string(tt: &tt::Subtree) -> Result<(SmolStr, Span), ExpandError> { tt.token_trees .first() .and_then(|tt| match tt { - tt::TokenTree::Leaf(tt::Leaf::Literal(it)) => unquote_str(it), + tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal { + text, + span, + kind: tt::LitKind::Str, + suffix: _, + })) => Some((unescape_str(text), *span)), _ => None, }) .ok_or(mbe::ExpandError::ConversionError.into()) @@ -738,6 +749,8 @@ fn include_bytes_expand( token_trees: Box::new([tt::TokenTree::Leaf(tt::Leaf::Literal(tt::Literal { text: r#"b"""#.into(), span, + kind: tt::LitKind::ByteStrRaw(1), + suffix: None, }))]), }; ExpandResult::ok(res) @@ -848,3 +861,17 @@ fn quote_expand( ExpandError::other("quote! is not implemented"), ) } + +fn unescape_str(s: &SmolStr) -> SmolStr { + if s.contains('\\') { + let mut buf = String::with_capacity(s.len()); + unescape_unicode(s, Mode::Str, &mut |_, c| { + if let Ok(c) = c { + buf.push(c) + } + }); + buf.into() + } else { + s.clone() + } +} diff --git a/crates/hir-expand/src/fixup.rs b/crates/hir-expand/src/fixup.rs index 9fdf4aa4f7..2896afed08 100644 --- a/crates/hir-expand/src/fixup.rs +++ b/crates/hir-expand/src/fixup.rs @@ -86,6 +86,7 @@ pub(crate) fn fixup_syntax( anchor: SpanAnchor { ast_id: FIXUP_DUMMY_AST_ID, ..span.anchor }, ctx: span.ctx, }, + is_raw: tt::IdentIsRaw::No, }); append.insert(node.clone().into(), vec![replacement]); preorder.skip_subtree(); @@ -101,6 +102,7 @@ pub(crate) fn fixup_syntax( Leaf::Ident(Ident { text: "__ra_fixup".into(), span: fake_span(node_range), + is_raw: tt::IdentIsRaw::No }), ]); } @@ -137,7 +139,8 @@ pub(crate) fn fixup_syntax( append.insert(if_token.into(), vec![ Leaf::Ident(Ident { text: "__ra_fixup".into(), - span: fake_span(node_range) + span: fake_span(node_range), + is_raw: tt::IdentIsRaw::No }), ]); } @@ -167,7 +170,8 @@ pub(crate) fn fixup_syntax( append.insert(while_token.into(), vec![ Leaf::Ident(Ident { text: "__ra_fixup".into(), - span: fake_span(node_range) + span: fake_span(node_range), + is_raw: tt::IdentIsRaw::No }), ]); } @@ -214,7 +218,8 @@ pub(crate) fn fixup_syntax( append.insert(match_token.into(), vec![ Leaf::Ident(Ident { text: "__ra_fixup".into(), - span: fake_span(node_range) + span: fake_span(node_range), + is_raw: tt::IdentIsRaw::No }), ]); } @@ -248,7 +253,8 @@ pub(crate) fn fixup_syntax( ].map(|text| Leaf::Ident(Ident { text: text.into(), - span: fake_span(node_range) + span: fake_span(node_range), + is_raw: tt::IdentIsRaw::No }), ); @@ -281,7 +287,8 @@ pub(crate) fn fixup_syntax( append.insert(colon.into(), vec![ Leaf::Ident(Ident { text: "__ra_fixup".into(), - span: fake_span(node_range) + span: fake_span(node_range), + is_raw: tt::IdentIsRaw::No }) ]); } @@ -293,7 +300,8 @@ pub(crate) fn fixup_syntax( append.insert(colon.into(), vec![ Leaf::Ident(Ident { text: "__ra_fixup".into(), - span: fake_span(node_range) + span: fake_span(node_range), + is_raw: tt::IdentIsRaw::No }) ]); } @@ -326,7 +334,8 @@ pub(crate) fn fixup_syntax( append.insert(node.into(), vec![ Leaf::Ident(Ident { text: "__ra_fixup".into(), - span: fake_span(node_range) + span: fake_span(node_range), + is_raw: tt::IdentIsRaw::No }) ]); } diff --git a/crates/hir-expand/src/lib.rs b/crates/hir-expand/src/lib.rs index e7c34e51e8..c4921da610 100644 --- a/crates/hir-expand/src/lib.rs +++ b/crates/hir-expand/src/lib.rs @@ -59,7 +59,7 @@ pub use span::{HirFileId, MacroCallId, MacroFileId}; pub mod tt { pub use span::Span; - pub use tt::{DelimiterKind, Spacing}; + pub use tt::{DelimiterKind, IdentIsRaw, LitKind, Spacing}; pub type Delimiter = ::tt::Delimiter; pub type DelimSpan = ::tt::DelimSpan; diff --git a/crates/hir-expand/src/mod_path.rs b/crates/hir-expand/src/mod_path.rs index ed7d551888..907e939153 100644 --- a/crates/hir-expand/src/mod_path.rs +++ b/crates/hir-expand/src/mod_path.rs @@ -316,15 +316,15 @@ fn convert_path_tt(db: &dyn ExpandDatabase, tt: &[tt::TokenTree]) -> Option PathKind::Abs, _ => return None, }, - tt::Leaf::Ident(tt::Ident { text, span }) if text == "$crate" => { + tt::Leaf::Ident(tt::Ident { text, span, .. }) if text == "$crate" => { resolve_crate_root(db, span.ctx).map(PathKind::DollarCrate).unwrap_or(PathKind::Crate) } tt::Leaf::Ident(tt::Ident { text, .. }) if text == "self" => PathKind::SELF, tt::Leaf::Ident(tt::Ident { text, .. }) if text == "super" => { let mut deg = 1; - while let Some(tt::Leaf::Ident(tt::Ident { text, span, .. })) = leaves.next() { + while let Some(tt::Leaf::Ident(tt::Ident { text, span, is_raw })) = leaves.next() { if text != "super" { - segments.push(Name::new(text, span.ctx)); + segments.push(Name::new(text, *is_raw, span.ctx)); break; } deg += 1; @@ -333,13 +333,13 @@ fn convert_path_tt(db: &dyn ExpandDatabase, tt: &[tt::TokenTree]) -> Option PathKind::Crate, tt::Leaf::Ident(ident) => { - segments.push(Name::new(&ident.text, ident.span.ctx)); + segments.push(Name::new(&ident.text, ident.is_raw, ident.span.ctx)); PathKind::Plain } _ => return None, }; segments.extend(leaves.filter_map(|leaf| match leaf { - ::tt::Leaf::Ident(ident) => Some(Name::new(&ident.text, ident.span.ctx)), + ::tt::Leaf::Ident(ident) => Some(Name::new(&ident.text, ident.is_raw, ident.span.ctx)), _ => None, })); Some(ModPath { kind, segments }) diff --git a/crates/hir-expand/src/name.rs b/crates/hir-expand/src/name.rs index 67e73f7fc2..fce9df6722 100644 --- a/crates/hir-expand/src/name.rs +++ b/crates/hir-expand/src/name.rs @@ -82,9 +82,16 @@ impl Name { Name { symbol: Symbol::intern(text), ctx: () } } - pub fn new(text: &str, ctx: SyntaxContextId) -> Name { + pub fn new(text: &str, raw: tt::IdentIsRaw, ctx: SyntaxContextId) -> Name { _ = ctx; - Name { symbol: Symbol::intern(text), ctx: () } + Name { + symbol: if raw.yes() { + Symbol::intern(&format_smolstr!("{}{text}", raw.as_str())) + } else { + Symbol::intern(text) + }, + ctx: (), + } } pub fn new_tuple_field(idx: usize) -> Name { diff --git a/crates/hir-expand/src/quote.rs b/crates/hir-expand/src/quote.rs index f4fc3b7b3e..f1d28450b3 100644 --- a/crates/hir-expand/src/quote.rs +++ b/crates/hir-expand/src/quote.rs @@ -3,12 +3,12 @@ use intern::Symbol; use span::Span; -use syntax::format_smolstr; +use tt::IdentIsRaw; use crate::name::Name; pub(crate) const fn dollar_crate(span: Span) -> tt::Ident { - tt::Ident { text: syntax::SmolStr::new_static("$crate"), span } + tt::Ident { text: syntax::SmolStr::new_static("$crate"), span, is_raw: tt::IdentIsRaw::No } } // A helper macro quote macro @@ -101,6 +101,7 @@ macro_rules! __quote { crate::tt::Leaf::Ident(crate::tt::Ident { text: stringify!($tt).into(), span: $span, + is_raw: tt::IdentIsRaw::No, }).into() }] }; @@ -209,23 +210,30 @@ macro_rules! impl_to_to_tokentrees { } impl_to_to_tokentrees! { - span: u32 => self { crate::tt::Literal{text: self.to_string().into(), span} }; - span: usize => self { crate::tt::Literal{text: self.to_string().into(), span} }; - span: i32 => self { crate::tt::Literal{text: self.to_string().into(), span} }; - span: bool => self { crate::tt::Ident{text: self.to_string().into(), span} }; + span: u32 => self { crate::tt::Literal{text: self.to_string().into(), span, kind: tt::LitKind::Integer, suffix: None } }; + span: usize => self { crate::tt::Literal{text: self.to_string().into(), span, kind: tt::LitKind::Integer, suffix: None } }; + span: i32 => self { crate::tt::Literal{text: self.to_string().into(), span, kind: tt::LitKind::Integer, suffix: None } }; + span: bool => self { crate::tt::Ident{text: self.to_string().into(), span, is_raw: tt::IdentIsRaw::No } }; _span: crate::tt::Leaf => self { self }; _span: crate::tt::Literal => self { self }; _span: crate::tt::Ident => self { self }; _span: crate::tt::Punct => self { self }; - span: &str => self { crate::tt::Literal{text: format_smolstr!("\"{}\"", self.escape_default()), span}}; - span: String => self { crate::tt::Literal{text: format_smolstr!("\"{}\"", self.escape_default()), span}}; - span: Name => self { crate::tt::Ident{text: self.to_smol_str(), span}}; - span: Symbol => self { crate::tt::Ident{text: self.as_str().into(), span}}; + span: &str => self { crate::tt::Literal{text: (*self).into(), span, kind: tt::LitKind::Str, suffix: None }}; + span: String => self { crate::tt::Literal{text: self.into(), span, kind: tt::LitKind::Str, suffix: None }}; + span: Name => self { + let (is_raw, s) = IdentIsRaw::split_from_symbol(self.as_str()); + crate::tt::Ident{text: s.into(), span, is_raw } + }; + span: Symbol => self { + let (is_raw, s) = IdentIsRaw::split_from_symbol(self.as_str()); + crate::tt::Ident{text: s.into(), span, is_raw } + }; } #[cfg(test)] mod tests { use crate::tt; + use ::tt::IdentIsRaw; use base_db::FileId; use expect_test::expect; use span::{SpanAnchor, SyntaxContextId, ROOT_ERASED_FILE_AST_ID}; @@ -259,7 +267,8 @@ mod tests { } fn mk_ident(name: &str) -> crate::tt::Ident { - crate::tt::Ident { text: name.into(), span: DUMMY } + let (is_raw, s) = IdentIsRaw::split_from_symbol(name); + crate::tt::Ident { text: s.into(), span: DUMMY, is_raw } } #[test] diff --git a/crates/hir/src/attrs.rs b/crates/hir/src/attrs.rs index af60c233e5..02d92620e0 100644 --- a/crates/hir/src/attrs.rs +++ b/crates/hir/src/attrs.rs @@ -328,9 +328,11 @@ fn doc_modpath_from_str(link: &str) -> Option { }; let parts = first_segment.into_iter().chain(parts).map(|segment| match segment.parse() { Ok(idx) => Name::new_tuple_field(idx), - Err(_) => { - Name::new(segment.split_once('<').map_or(segment, |it| it.0), SyntaxContextId::ROOT) - } + Err(_) => Name::new( + segment.split_once('<').map_or(segment, |it| it.0), + tt::IdentIsRaw::No, + SyntaxContextId::ROOT, + ), }); Some(ModPath::from_segments(kind, parts)) }; diff --git a/crates/ide-db/src/documentation.rs b/crates/ide-db/src/documentation.rs index 58e77b95c3..1b9b78f691 100644 --- a/crates/ide-db/src/documentation.rs +++ b/crates/ide-db/src/documentation.rs @@ -269,12 +269,13 @@ fn get_doc_string_in_attr(it: &ast::Attr) -> Option { } fn doc_indent(attrs: &hir::Attrs) -> usize { - attrs - .by_key("doc") - .attrs() - .filter_map(|attr| attr.string_value()) // no need to use unescape version here - .flat_map(|s| s.lines()) - .filter_map(|line| line.chars().position(|c| !c.is_whitespace())) - .min() - .unwrap_or(0) + let mut min = !0; + for val in attrs.by_key("doc").attrs().filter_map(|attr| attr.string_value_unescape()) { + if let Some(m) = + val.lines().filter_map(|line| line.chars().position(|c| !c.is_whitespace())).min() + { + min = min.min(m); + } + } + min } diff --git a/crates/ide/src/hover.rs b/crates/ide/src/hover.rs index 2006baa30a..701374616a 100644 --- a/crates/ide/src/hover.rs +++ b/crates/ide/src/hover.rs @@ -240,7 +240,7 @@ fn hover_simple( .flatten() .unique_by(|&(def, _, _)| def) .map(|(def, macro_arm, node)| { - hover_for_definition(sema, file_id, def, &node, macro_arm, config) + dbg!(hover_for_definition(sema, file_id, def, &node, macro_arm, config)) }) .reduce(|mut acc: HoverResult, HoverResult { markup, actions }| { acc.actions.extend(actions); diff --git a/crates/mbe/Cargo.toml b/crates/mbe/Cargo.toml index 18444018e1..7ce8aadfb3 100644 --- a/crates/mbe/Cargo.toml +++ b/crates/mbe/Cargo.toml @@ -17,6 +17,7 @@ rustc-hash.workspace = true smallvec.workspace = true tracing.workspace = true arrayvec.workspace = true +ra-ap-rustc_lexer.workspace = true # local deps syntax.workspace = true diff --git a/crates/mbe/src/benchmark.rs b/crates/mbe/src/benchmark.rs index 27dbc84a2b..6a2f1c2368 100644 --- a/crates/mbe/src/benchmark.rs +++ b/crates/mbe/src/benchmark.rs @@ -226,13 +226,24 @@ fn invocation_fixtures( *seed } fn make_ident(ident: &str) -> tt::TokenTree { - tt::Leaf::Ident(tt::Ident { span: DUMMY, text: SmolStr::new(ident) }).into() + tt::Leaf::Ident(tt::Ident { + span: DUMMY, + text: SmolStr::new(ident), + is_raw: tt::IdentIsRaw::No, + }) + .into() } fn make_punct(char: char) -> tt::TokenTree { tt::Leaf::Punct(tt::Punct { span: DUMMY, char, spacing: tt::Spacing::Alone }).into() } fn make_literal(lit: &str) -> tt::TokenTree { - tt::Leaf::Literal(tt::Literal { span: DUMMY, text: SmolStr::new(lit) }).into() + tt::Leaf::Literal(tt::Literal { + span: DUMMY, + text: SmolStr::new(lit), + kind: tt::LitKind::Str, + suffix: None, + }) + .into() } fn make_subtree( kind: tt::DelimiterKind, diff --git a/crates/mbe/src/expander/transcriber.rs b/crates/mbe/src/expander/transcriber.rs index c09cbd1d07..e3359865cb 100644 --- a/crates/mbe/src/expander/transcriber.rs +++ b/crates/mbe/src/expander/transcriber.rs @@ -2,7 +2,7 @@ //! `$ident => foo`, interpolates variables in the template, to get `fn foo() {}` use span::Span; -use syntax::SmolStr; +use syntax::{format_smolstr, SmolStr}; use tt::Delimiter; use crate::{ @@ -99,6 +99,7 @@ impl Bindings { Fragment::Tokens(tt::TokenTree::Leaf(tt::Leaf::Ident(tt::Ident { text: SmolStr::new_static("missing"), span, + is_raw: tt::IdentIsRaw::No, }))) } MetaVarKind::Lifetime => { @@ -113,6 +114,7 @@ impl Bindings { tt::TokenTree::Leaf(tt::Leaf::Ident(tt::Ident { text: SmolStr::new_static("missing"), span, + is_raw: tt::IdentIsRaw::No, })), ]), })) @@ -121,6 +123,7 @@ impl Bindings { Fragment::Tokens(tt::TokenTree::Leaf(tt::Leaf::Ident(tt::Ident { text: SmolStr::new_static("\"missing\""), span, + is_raw: tt::IdentIsRaw::No, }))) } } @@ -236,8 +239,10 @@ fn expand_subtree( ctx.nesting.get(ctx.nesting.len() - 1 - depth).map_or(0, |nest| nest.idx); arena.push( tt::Leaf::Literal(tt::Literal { - text: index.to_string().into(), + text: format_smolstr!("{index}"), span: ctx.call_site, + kind: tt::LitKind::Integer, + suffix: None, }) .into(), ); @@ -249,8 +254,10 @@ fn expand_subtree( }); arena.push( tt::Leaf::Literal(tt::Literal { - text: length.to_string().into(), + text: format_smolstr!("{length}"), span: ctx.call_site, + kind: tt::LitKind::Integer, + suffix: None, }) .into(), ); @@ -314,8 +321,10 @@ fn expand_subtree( }; arena.push( tt::Leaf::Literal(tt::Literal { - text: c.to_string().into(), + text: format_smolstr!("{c}"), span: ctx.call_site, + suffix: None, + kind: tt::LitKind::Integer, }) .into(), ); @@ -363,7 +372,12 @@ fn expand_var( token_trees: Box::new([ tt::Leaf::from(tt::Punct { char: '$', spacing: tt::Spacing::Alone, span: id }) .into(), - tt::Leaf::from(tt::Ident { text: v.clone(), span: id }).into(), + tt::Leaf::from(tt::Ident { + text: v.clone(), + span: id, + is_raw: tt::IdentIsRaw::No, + }) + .into(), ]), } .into(); diff --git a/crates/mbe/src/lib.rs b/crates/mbe/src/lib.rs index b06c6cee12..8ab9269e95 100644 --- a/crates/mbe/src/lib.rs +++ b/crates/mbe/src/lib.rs @@ -6,6 +6,13 @@ //! The tests for this functionality live in another crate: //! `hir_def::macro_expansion_tests::mbe`. +#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))] + +#[cfg(not(feature = "in-rust-tree"))] +extern crate ra_ap_rustc_lexer as rustc_lexer; +#[cfg(feature = "in-rust-tree")] +extern crate rustc_lexer; + mod expander; mod parser; mod syntax_bridge; @@ -27,9 +34,9 @@ pub use ::parser::TopEntryPoint; pub use tt::{Delimiter, DelimiterKind, Punct}; pub use crate::syntax_bridge::{ - parse_exprs_with_sep, parse_to_token_tree, parse_to_token_tree_static_span, - syntax_node_to_token_tree, syntax_node_to_token_tree_modified, token_tree_to_syntax_node, - DocCommentDesugarMode, SpanMapper, + desugar_doc_comment_text, parse_exprs_with_sep, parse_to_token_tree, + parse_to_token_tree_static_span, syntax_node_to_token_tree, syntax_node_to_token_tree_modified, + token_to_literal, token_tree_to_syntax_node, DocCommentDesugarMode, SpanMapper, }; pub use crate::syntax_bridge::dummy_test_span_utils::*; diff --git a/crates/mbe/src/parser.rs b/crates/mbe/src/parser.rs index 5c499c06b1..18af35c1e2 100644 --- a/crates/mbe/src/parser.rs +++ b/crates/mbe/src/parser.rs @@ -205,7 +205,11 @@ fn next_op( tt::TokenTree::Leaf(leaf) => match leaf { tt::Leaf::Ident(ident) if ident.text == "crate" => { // We simply produce identifier `$crate` here. And it will be resolved when lowering ast to Path. - Op::Ident(tt::Ident { text: "$crate".into(), span: ident.span }) + Op::Ident(tt::Ident { + text: "$crate".into(), + span: ident.span, + is_raw: tt::IdentIsRaw::No, + }) } tt::Leaf::Ident(ident) => { let kind = eat_fragment_kind(edition, src, mode)?; @@ -380,9 +384,11 @@ fn parse_metavar_expr(new_meta_vars: bool, src: &mut TtIter<'_, Span>) -> Result fn parse_depth(src: &mut TtIter<'_, Span>) -> Result { if src.len() == 0 { Ok(0) - } else if let tt::Leaf::Literal(lit) = src.expect_literal()? { + } else if let tt::Leaf::Literal(tt::Literal { text, suffix: None, .. }) = + src.expect_literal()? + { // Suffixes are not allowed. - lit.text.parse().map_err(|_| ()) + text.parse().map_err(|_| ()) } else { Err(()) } diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index 73a04f00d9..3feddba210 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -4,11 +4,11 @@ use std::fmt; use rustc_hash::{FxHashMap, FxHashSet}; use span::{Edition, SpanAnchor, SpanData, SpanMap}; -use stdx::{never, non_empty_vec::NonEmptyVec}; +use stdx::{format_to, itertools::Itertools, never, non_empty_vec::NonEmptyVec}; use syntax::{ ast::{self, make::tokens::doc_comment}, - AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, SyntaxKind, - SyntaxKind::*, + format_smolstr, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, + SyntaxKind::{self, *}, SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, WalkEvent, T, }; use tt::{ @@ -317,18 +317,29 @@ where .into() } kind => { - macro_rules! make_leaf { - ($i:ident) => { - tt::$i { span: conv.span_for(abs_range), text: token.to_text(conv) } - .into() + macro_rules! make_ident { + () => { + tt::Ident { + span: conv.span_for(abs_range), + text: token.to_text(conv), + is_raw: tt::IdentIsRaw::No, + } + .into() }; } let leaf: tt::Leaf<_> = match kind { - T![true] | T![false] => make_leaf!(Ident), - IDENT => make_leaf!(Ident), - UNDERSCORE => make_leaf!(Ident), - k if k.is_keyword() => make_leaf!(Ident), - k if k.is_literal() => make_leaf!(Literal), + T![true] | T![false] => make_ident!(), + IDENT => { + let text = token.to_text(conv); + tt::Ident::new(text, conv.span_for(abs_range)).into() + } + UNDERSCORE => make_ident!(), + k if k.is_keyword() => make_ident!(), + k if k.is_literal() => { + let text = token.to_text(conv); + let span = conv.span_for(abs_range); + token_to_literal(text, span).into() + } LIFETIME_IDENT => { let apostrophe = tt::Leaf::from(tt::Punct { char: '\'', @@ -344,6 +355,7 @@ where abs_range.start() + TextSize::of('\''), abs_range.end(), )), + is_raw: tt::IdentIsRaw::No, }); token_trees.push(ident.into()); continue; @@ -388,6 +400,56 @@ where } } +pub fn token_to_literal(text: SmolStr, span: S) -> tt::Literal +where + S: Copy, +{ + use rustc_lexer::LiteralKind; + + let token = rustc_lexer::tokenize(&text).next_tuple(); + let Some((rustc_lexer::Token { + kind: rustc_lexer::TokenKind::Literal { kind, suffix_start }, + .. + },)) = token + else { + return tt::Literal { span, text, kind: tt::LitKind::Err(()), suffix: None }; + }; + + let (kind, start_offset, end_offset) = match kind { + LiteralKind::Int { .. } => (tt::LitKind::Integer, 0, 0), + LiteralKind::Float { .. } => (tt::LitKind::Float, 0, 0), + LiteralKind::Char { terminated } => (tt::LitKind::Char, 1, terminated as usize), + LiteralKind::Byte { terminated } => (tt::LitKind::Byte, 2, terminated as usize), + LiteralKind::Str { terminated } => (tt::LitKind::Str, 1, terminated as usize), + LiteralKind::ByteStr { terminated } => (tt::LitKind::ByteStr, 2, terminated as usize), + LiteralKind::CStr { terminated } => (tt::LitKind::CStr, 2, terminated as usize), + LiteralKind::RawStr { n_hashes } => ( + tt::LitKind::StrRaw(n_hashes.unwrap_or_default()), + 2 + n_hashes.unwrap_or_default() as usize, + 1 + n_hashes.unwrap_or_default() as usize, + ), + LiteralKind::RawByteStr { n_hashes } => ( + tt::LitKind::ByteStrRaw(n_hashes.unwrap_or_default()), + 3 + n_hashes.unwrap_or_default() as usize, + 1 + n_hashes.unwrap_or_default() as usize, + ), + LiteralKind::RawCStr { n_hashes } => ( + tt::LitKind::CStrRaw(n_hashes.unwrap_or_default()), + 3 + n_hashes.unwrap_or_default() as usize, + 1 + n_hashes.unwrap_or_default() as usize, + ), + }; + + let (lit, suffix) = text.split_at(suffix_start as usize); + let lit = &lit[start_offset..lit.len() - end_offset]; + let suffix = match suffix { + "" | "_" => None, + suffix => Some(Box::new(suffix.into())), + }; + + tt::Literal { span, text: lit.into(), kind, suffix } +} + fn is_single_token_op(kind: SyntaxKind) -> bool { matches!( kind, @@ -421,16 +483,10 @@ fn is_single_token_op(kind: SyntaxKind) -> bool { /// That is, strips leading `///` (or `/**`, etc) /// and strips the ending `*/` /// And then quote the string, which is needed to convert to `tt::Literal` -fn doc_comment_text(comment: &ast::Comment, mode: DocCommentDesugarMode) -> SmolStr { - let prefix_len = comment.prefix().len(); - let mut text = &comment.text()[prefix_len..]; - - // Remove ending "*/" - if comment.kind().shape == ast::CommentShape::Block { - text = &text[0..text.len() - 2]; - } - - let text = match mode { +/// +/// Note that proc-macros desugar with string literals where as macro_rules macros desugar with raw string literals. +pub fn desugar_doc_comment_text(text: &str, mode: DocCommentDesugarMode) -> (SmolStr, tt::LitKind) { + match mode { DocCommentDesugarMode::Mbe => { let mut num_of_hashes = 0; let mut count = 0; @@ -444,14 +500,13 @@ fn doc_comment_text(comment: &ast::Comment, mode: DocCommentDesugarMode) -> Smol } // Quote raw string with delimiters - // Note that `tt::Literal` expect an escaped string - format!(r#"r{delim}"{text}"{delim}"#, delim = "#".repeat(num_of_hashes)) + (text.into(), tt::LitKind::StrRaw(num_of_hashes)) } // Quote string with delimiters - // Note that `tt::Literal` expect an escaped string - DocCommentDesugarMode::ProcMacro => format!(r#""{}""#, text.escape_debug()), - }; - text.into() + DocCommentDesugarMode::ProcMacro => { + (format_smolstr!("{}", text.escape_debug()), tt::LitKind::Str) + } + } } fn convert_doc_comment( @@ -463,8 +518,13 @@ fn convert_doc_comment( let comment = ast::Comment::cast(token.clone())?; let doc = comment.kind().doc?; - let mk_ident = - |s: &str| tt::TokenTree::from(tt::Leaf::from(tt::Ident { text: s.into(), span })); + let mk_ident = |s: &str| { + tt::TokenTree::from(tt::Leaf::from(tt::Ident { + text: s.into(), + span, + is_raw: tt::IdentIsRaw::No, + })) + }; let mk_punct = |c: char| { tt::TokenTree::from(tt::Leaf::from(tt::Punct { @@ -475,7 +535,15 @@ fn convert_doc_comment( }; let mk_doc_literal = |comment: &ast::Comment| { - let lit = tt::Literal { text: doc_comment_text(comment, mode), span }; + let prefix_len = comment.prefix().len(); + let mut text = &comment.text()[prefix_len..]; + + // Remove ending "*/" + if comment.kind().shape == ast::CommentShape::Block { + text = &text[0..text.len() - 2]; + } + let (text, kind) = desugar_doc_comment_text(text, mode); + let lit = tt::Literal { text, span, kind, suffix: None }; tt::TokenTree::from(tt::Leaf::from(lit)) }; @@ -902,16 +970,17 @@ fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> { impl TtTreeSink<'_, Ctx> where - SpanData: Copy, + SpanData: Copy + fmt::Debug, { /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween. /// This occurs when a float literal is used as a field access. fn float_split(&mut self, has_pseudo_dot: bool) { let (text, span) = match self.cursor.token_tree() { - Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Literal(lit), _)) => { - (lit.text.as_str(), lit.span) - } - _ => unreachable!(), + Some(tt::buffer::TokenTreeRef::Leaf( + tt::Leaf::Literal(tt::Literal { text, span, kind: tt::LitKind::Float, suffix: _ }), + _, + )) => (text.as_str(), *span), + tt => unreachable!("{tt:?}"), }; // FIXME: Span splitting match text.split_once('.') { @@ -954,7 +1023,7 @@ where } let mut last = self.cursor; - for _ in 0..n_tokens { + 'tokens: for _ in 0..n_tokens { let tmp: u8; if self.cursor.eof() { break; @@ -962,23 +1031,36 @@ where last = self.cursor; let (text, span) = loop { break match self.cursor.token_tree() { - Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => { - // Mark the range if needed - let (text, span) = match leaf { - tt::Leaf::Ident(ident) => (ident.text.as_str(), ident.span), - tt::Leaf::Punct(punct) => { - assert!(punct.char.is_ascii()); - tmp = punct.char as u8; - ( - std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(), - punct.span, - ) + Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => match leaf { + tt::Leaf::Ident(ident) => { + if ident.is_raw.yes() { + self.buf.push_str("r#"); + self.text_pos += TextSize::of("r#"); } - tt::Leaf::Literal(lit) => (lit.text.as_str(), lit.span), - }; - self.cursor = self.cursor.bump(); - (text, span) - } + let r = (ident.text.as_str(), ident.span); + self.cursor = self.cursor.bump(); + r + } + tt::Leaf::Punct(punct) => { + assert!(punct.char.is_ascii()); + tmp = punct.char as u8; + let r = ( + std::str::from_utf8(std::slice::from_ref(&tmp)).unwrap(), + punct.span, + ); + self.cursor = self.cursor.bump(); + r + } + tt::Leaf::Literal(lit) => { + let buf_l = self.buf.len(); + format_to!(self.buf, "{lit}"); + debug_assert_ne!(self.buf.len() - buf_l, 0); + self.text_pos += TextSize::new((self.buf.len() - buf_l) as u32); + self.token_map.push(self.text_pos, lit.span); + self.cursor = self.cursor.bump(); + continue 'tokens; + } + }, Some(tt::buffer::TokenTreeRef::Subtree(subtree, _)) => { self.cursor = self.cursor.subtree().unwrap(); match delim_to_str(subtree.delimiter.kind, false) { diff --git a/crates/mbe/src/to_parser_input.rs b/crates/mbe/src/to_parser_input.rs index 3f70149aa5..bf5494d371 100644 --- a/crates/mbe/src/to_parser_input.rs +++ b/crates/mbe/src/to_parser_input.rs @@ -35,20 +35,21 @@ pub(crate) fn to_parser_input(buffer: &TokenBuffer<'_, S>) Some(tt::buffer::TokenTreeRef::Leaf(leaf, _)) => { match leaf { tt::Leaf::Literal(lit) => { - let is_negated = lit.text.starts_with('-'); - let inner_text = &lit.text[if is_negated { 1 } else { 0 }..]; - - let kind = parser::LexedStr::single_token(inner_text) - .map(|(kind, _error)| kind) - .filter(|kind| { - kind.is_literal() - && (!is_negated || matches!(kind, FLOAT_NUMBER | INT_NUMBER)) - }) - .unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &lit)); - + let kind = match lit.kind { + tt::LitKind::Byte => SyntaxKind::BYTE, + tt::LitKind::Char => SyntaxKind::CHAR, + tt::LitKind::Integer => SyntaxKind::INT_NUMBER, + tt::LitKind::Float => SyntaxKind::FLOAT_NUMBER, + tt::LitKind::Str | tt::LitKind::StrRaw(_) => SyntaxKind::STRING, + tt::LitKind::ByteStr | tt::LitKind::ByteStrRaw(_) => { + SyntaxKind::BYTE_STRING + } + tt::LitKind::CStr | tt::LitKind::CStrRaw(_) => SyntaxKind::C_STRING, + tt::LitKind::Err(_) => SyntaxKind::ERROR, + }; res.push(kind); - if kind == FLOAT_NUMBER && !inner_text.ends_with('.') { + if kind == FLOAT_NUMBER && !lit.text.ends_with('.') { // Tag the token as joint if it is float with a fractional part // we use this jointness to inform the parser about what token split // event to emit when we encounter a float literal in a field access @@ -58,6 +59,7 @@ pub(crate) fn to_parser_input(buffer: &TokenBuffer<'_, S>) tt::Leaf::Ident(ident) => match ident.text.as_ref() { "_" => res.push(T![_]), i if i.starts_with('\'') => res.push(LIFETIME_IDENT), + _ if ident.is_raw.yes() => res.push(IDENT), _ => match SyntaxKind::from_keyword(&ident.text) { Some(kind) => res.push(kind), None => { diff --git a/crates/proc-macro-api/src/msg.rs b/crates/proc-macro-api/src/msg.rs index fa3ba9bbfc..b5f3d0c3aa 100644 --- a/crates/proc-macro-api/src/msg.rs +++ b/crates/proc-macro-api/src/msg.rs @@ -19,8 +19,10 @@ pub const VERSION_CHECK_VERSION: u32 = 1; pub const ENCODE_CLOSE_SPAN_VERSION: u32 = 2; pub const HAS_GLOBAL_SPANS: u32 = 3; pub const RUST_ANALYZER_SPAN_SUPPORT: u32 = 4; +/// Whether literals encode their kind as an additional u32 field and idents their rawness as a u32 field +pub const EXTENDED_LEAF_DATA: u32 = 5; -pub const CURRENT_API_VERSION: u32 = RUST_ANALYZER_SPAN_SUPPORT; +pub const CURRENT_API_VERSION: u32 = EXTENDED_LEAF_DATA; #[derive(Debug, Serialize, Deserialize)] pub enum Request { @@ -178,6 +180,7 @@ mod tests { anchor, ctx: SyntaxContextId::ROOT, }, + is_raw: tt::IdentIsRaw::No, } .into(), ), @@ -185,26 +188,28 @@ mod tests { Ident { text: "Foo".into(), span: Span { - range: TextRange::at(TextSize::new(5), TextSize::of("Foo")), + range: TextRange::at(TextSize::new(5), TextSize::of("r#Foo")), anchor, ctx: SyntaxContextId::ROOT, }, + is_raw: tt::IdentIsRaw::Yes, } .into(), ), TokenTree::Leaf(Leaf::Literal(Literal { - text: "Foo".into(), - + text: "\"Foo\"".into(), span: Span { - range: TextRange::at(TextSize::new(8), TextSize::of("Foo")), + range: TextRange::at(TextSize::new(10), TextSize::of("\"Foo\"")), anchor, ctx: SyntaxContextId::ROOT, }, + kind: tt::LitKind::Str, + suffix: None, })), TokenTree::Leaf(Leaf::Punct(Punct { char: '@', span: Span { - range: TextRange::at(TextSize::new(11), TextSize::of('@')), + range: TextRange::at(TextSize::new(13), TextSize::of('@')), anchor, ctx: SyntaxContextId::ROOT, }, @@ -213,18 +218,27 @@ mod tests { TokenTree::Subtree(Subtree { delimiter: Delimiter { open: Span { - range: TextRange::at(TextSize::new(12), TextSize::of('{')), + range: TextRange::at(TextSize::new(14), TextSize::of('{')), anchor, ctx: SyntaxContextId::ROOT, }, close: Span { - range: TextRange::at(TextSize::new(13), TextSize::of('}')), + range: TextRange::at(TextSize::new(19), TextSize::of('}')), anchor, ctx: SyntaxContextId::ROOT, }, kind: DelimiterKind::Brace, }, - token_trees: Box::new([]), + token_trees: Box::new([TokenTree::Leaf(Leaf::Literal(Literal { + text: "0".into(), + span: Span { + range: TextRange::at(TextSize::new(15), TextSize::of("0u32")), + anchor, + ctx: SyntaxContextId::ROOT, + }, + kind: tt::LitKind::Integer, + suffix: Some(Box::new("u32".into())), + }))]), }), ]); @@ -236,7 +250,7 @@ mod tests { ctx: SyntaxContextId::ROOT, }, close: Span { - range: TextRange::empty(TextSize::new(13)), + range: TextRange::empty(TextSize::new(19)), anchor, ctx: SyntaxContextId::ROOT, }, diff --git a/crates/proc-macro-api/src/msg/flat.rs b/crates/proc-macro-api/src/msg/flat.rs index 11fd7596f2..7f5afdb727 100644 --- a/crates/proc-macro-api/src/msg/flat.rs +++ b/crates/proc-macro-api/src/msg/flat.rs @@ -43,7 +43,7 @@ use serde::{Deserialize, Serialize}; use span::{ErasedFileAstId, FileId, Span, SpanAnchor, SyntaxContextId}; use text_size::TextRange; -use crate::msg::ENCODE_CLOSE_SPAN_VERSION; +use crate::msg::{ENCODE_CLOSE_SPAN_VERSION, EXTENDED_LEAF_DATA}; pub type SpanDataIndexMap = indexmap::IndexSet>; @@ -108,6 +108,8 @@ struct SubtreeRepr { struct LiteralRepr { id: TokenId, text: u32, + suffix: u32, + kind: u16, } struct PunctRepr { @@ -119,6 +121,7 @@ struct PunctRepr { struct IdentRepr { id: TokenId, text: u32, + is_raw: bool, } impl FlatTree { @@ -147,9 +150,17 @@ impl FlatTree { } else { write_vec(w.subtree, SubtreeRepr::write) }, - literal: write_vec(w.literal, LiteralRepr::write), + literal: if version >= EXTENDED_LEAF_DATA { + write_vec(w.literal, LiteralRepr::write_with_kind) + } else { + write_vec(w.literal, LiteralRepr::write) + }, punct: write_vec(w.punct, PunctRepr::write), - ident: write_vec(w.ident, IdentRepr::write), + ident: if version >= EXTENDED_LEAF_DATA { + write_vec(w.ident, IdentRepr::write_with_rawness) + } else { + write_vec(w.ident, IdentRepr::write) + }, token_tree: w.token_tree, text: w.text, } @@ -176,9 +187,17 @@ impl FlatTree { } else { write_vec(w.subtree, SubtreeRepr::write) }, - literal: write_vec(w.literal, LiteralRepr::write), + literal: if version >= EXTENDED_LEAF_DATA { + write_vec(w.literal, LiteralRepr::write_with_kind) + } else { + write_vec(w.literal, LiteralRepr::write) + }, punct: write_vec(w.punct, PunctRepr::write), - ident: write_vec(w.ident, IdentRepr::write), + ident: if version >= EXTENDED_LEAF_DATA { + write_vec(w.ident, IdentRepr::write_with_rawness) + } else { + write_vec(w.ident, IdentRepr::write) + }, token_tree: w.token_tree, text: w.text, } @@ -195,9 +214,17 @@ impl FlatTree { } else { read_vec(self.subtree, SubtreeRepr::read) }, - literal: read_vec(self.literal, LiteralRepr::read), + literal: if version >= EXTENDED_LEAF_DATA { + read_vec(self.literal, LiteralRepr::read_with_kind) + } else { + read_vec(self.literal, LiteralRepr::read) + }, punct: read_vec(self.punct, PunctRepr::read), - ident: read_vec(self.ident, IdentRepr::read), + ident: if version >= EXTENDED_LEAF_DATA { + read_vec(self.ident, IdentRepr::read_with_rawness) + } else { + read_vec(self.ident, IdentRepr::read) + }, token_tree: self.token_tree, text: self.text, span_data_table, @@ -212,9 +239,17 @@ impl FlatTree { } else { read_vec(self.subtree, SubtreeRepr::read) }, - literal: read_vec(self.literal, LiteralRepr::read), + literal: if version >= EXTENDED_LEAF_DATA { + read_vec(self.literal, LiteralRepr::read_with_kind) + } else { + read_vec(self.literal, LiteralRepr::read) + }, punct: read_vec(self.punct, PunctRepr::read), - ident: read_vec(self.ident, IdentRepr::read), + ident: if version >= EXTENDED_LEAF_DATA { + read_vec(self.ident, IdentRepr::read_with_rawness) + } else { + read_vec(self.ident, IdentRepr::read) + }, token_tree: self.token_tree, text: self.text, span_data_table: &(), @@ -280,14 +315,20 @@ impl LiteralRepr { [self.id.0, self.text] } fn read([id, text]: [u32; 2]) -> LiteralRepr { - LiteralRepr { id: TokenId(id), text } + LiteralRepr { id: TokenId(id), text, kind: 0, suffix: !0 } + } + fn write_with_kind(self) -> [u32; 4] { + [self.id.0, self.text, self.kind as u32, self.suffix] + } + fn read_with_kind([id, text, kind, suffix]: [u32; 4]) -> LiteralRepr { + LiteralRepr { id: TokenId(id), text, kind: kind as u16, suffix } } } impl PunctRepr { fn write(self) -> [u32; 3] { let spacing = match self.spacing { - tt::Spacing::Alone => 0, + tt::Spacing::Alone | tt::Spacing::JointHidden => 0, tt::Spacing::Joint => 1, }; [self.id.0, self.char as u32, spacing] @@ -307,7 +348,13 @@ impl IdentRepr { [self.id.0, self.text] } fn read(data: [u32; 2]) -> IdentRepr { - IdentRepr { id: TokenId(data[0]), text: data[1] } + IdentRepr { id: TokenId(data[0]), text: data[1], is_raw: false } + } + fn write_with_rawness(self) -> [u32; 3] { + [self.id.0, self.text, self.is_raw as u32] + } + fn read_with_rawness([id, text, is_raw]: [u32; 3]) -> IdentRepr { + IdentRepr { id: TokenId(id), text, is_raw: is_raw == 1 } } } @@ -380,7 +427,25 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> { let idx = self.literal.len() as u32; let text = self.intern(&lit.text); let id = self.token_id_of(lit.span); - self.literal.push(LiteralRepr { id, text }); + let suffix = lit.suffix.as_ref().map(|s| self.intern(s)).unwrap_or(!0); + self.literal.push(LiteralRepr { + id, + text, + kind: u16::from_le_bytes(match lit.kind { + tt::LitKind::Err(_) => [0, 0], + tt::LitKind::Byte => [1, 0], + tt::LitKind::Char => [2, 0], + tt::LitKind::Integer => [3, 0], + tt::LitKind::Float => [4, 0], + tt::LitKind::Str => [5, 0], + tt::LitKind::StrRaw(r) => [6, r], + tt::LitKind::ByteStr => [7, 0], + tt::LitKind::ByteStrRaw(r) => [8, r], + tt::LitKind::CStr => [9, 0], + tt::LitKind::CStrRaw(r) => [10, r], + }), + suffix, + }); idx << 2 | 0b01 } tt::Leaf::Punct(punct) => { @@ -393,7 +458,11 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> { let idx = self.ident.len() as u32; let text = self.intern(&ident.text); let id = self.token_id_of(ident.span); - self.ident.push(IdentRepr { id, text }); + self.ident.push(IdentRepr { + id, + text, + is_raw: ident.is_raw == tt::IdentIsRaw::Yes, + }); idx << 2 | 0b11 } }, @@ -457,10 +526,32 @@ impl<'span, S: InternableSpan> Reader<'span, S> { // that this unwrap doesn't fire. 0b00 => res[idx].take().unwrap().into(), 0b01 => { + use tt::LitKind::*; let repr = &self.literal[idx]; tt::Leaf::Literal(tt::Literal { text: self.text[repr.text as usize].as_str().into(), span: read_span(repr.id), + kind: match u16::to_le_bytes(repr.kind) { + [0, _] => Err(()), + [1, _] => Byte, + [2, _] => Char, + [3, _] => Integer, + [4, _] => Float, + [5, _] => Str, + [6, r] => StrRaw(r), + [7, _] => ByteStr, + [8, r] => ByteStrRaw(r), + [9, _] => CStr, + [10, r] => CStrRaw(r), + _ => unreachable!(), + }, + suffix: if repr.suffix != !0 { + Some(Box::new( + self.text[repr.suffix as usize].as_str().into(), + )) + } else { + None + }, }) .into() } @@ -478,6 +569,11 @@ impl<'span, S: InternableSpan> Reader<'span, S> { tt::Leaf::Ident(tt::Ident { text: self.text[repr.text as usize].as_str().into(), span: read_span(repr.id), + is_raw: if repr.is_raw { + tt::IdentIsRaw::Yes + } else { + tt::IdentIsRaw::No + }, }) .into() } diff --git a/crates/proc-macro-srv/proc-macro-test/imp/src/lib.rs b/crates/proc-macro-srv/proc-macro-test/imp/src/lib.rs index a1707364f3..749a776059 100644 --- a/crates/proc-macro-srv/proc-macro-test/imp/src/lib.rs +++ b/crates/proc-macro-srv/proc-macro-test/imp/src/lib.rs @@ -1,6 +1,5 @@ //! Exports a few trivial procedural macros for testing. - #![feature(proc_macro_span, proc_macro_def_site)] #![allow(clippy::all)] diff --git a/crates/proc-macro-srv/src/server_impl.rs b/crates/proc-macro-srv/src/server_impl.rs index e8b340a43d..68e0f85978 100644 --- a/crates/proc-macro-srv/src/server_impl.rs +++ b/crates/proc-macro-srv/src/server_impl.rs @@ -49,58 +49,39 @@ fn spacing_to_internal(spacing: proc_macro::Spacing) -> Spacing { #[allow(unused)] fn spacing_to_external(spacing: Spacing) -> proc_macro::Spacing { match spacing { - Spacing::Alone => proc_macro::Spacing::Alone, + Spacing::Alone | Spacing::JointHidden => proc_macro::Spacing::Alone, Spacing::Joint => proc_macro::Spacing::Joint, } } -/// Invokes the callback with a `&[&str]` consisting of each part of the -/// literal's representation. This is done to allow the `ToString` and -/// `Display` implementations to borrow references to symbol values, and -/// both be optimized to reduce overhead. -fn literal_with_stringify_parts( - literal: &bridge::Literal, - interner: SymbolInternerRef, - f: impl FnOnce(&[&str]) -> R, -) -> R { - /// Returns a string containing exactly `num` '#' characters. - /// Uses a 256-character source string literal which is always safe to - /// index with a `u8` index. - fn get_hashes_str(num: u8) -> &'static str { - const HASHES: &str = "\ - ################################################################\ - ################################################################\ - ################################################################\ - ################################################################\ - "; - const _: () = assert!(HASHES.len() == 256); - &HASHES[..num as usize] - } - - { - let symbol = &*literal.symbol.text(interner); - let suffix = &*literal.suffix.map(|s| s.text(interner)).unwrap_or_default(); - match literal.kind { - bridge::LitKind::Byte => f(&["b'", symbol, "'", suffix]), - bridge::LitKind::Char => f(&["'", symbol, "'", suffix]), - bridge::LitKind::Str => f(&["\"", symbol, "\"", suffix]), - bridge::LitKind::StrRaw(n) => { - let hashes = get_hashes_str(n); - f(&["r", hashes, "\"", symbol, "\"", hashes, suffix]) - } - bridge::LitKind::ByteStr => f(&["b\"", symbol, "\"", suffix]), - bridge::LitKind::ByteStrRaw(n) => { - let hashes = get_hashes_str(n); - f(&["br", hashes, "\"", symbol, "\"", hashes, suffix]) - } - bridge::LitKind::CStr => f(&["c\"", symbol, "\"", suffix]), - bridge::LitKind::CStrRaw(n) => { - let hashes = get_hashes_str(n); - f(&["cr", hashes, "\"", symbol, "\"", hashes, suffix]) - } - bridge::LitKind::Integer | bridge::LitKind::Float | bridge::LitKind::ErrWithGuar => { - f(&[symbol, suffix]) - } - } +fn literal_kind_to_external(kind: tt::LitKind) -> bridge::LitKind { + match kind { + tt::LitKind::Byte => bridge::LitKind::Byte, + tt::LitKind::Char => bridge::LitKind::Char, + tt::LitKind::Integer => bridge::LitKind::Integer, + tt::LitKind::Float => bridge::LitKind::Float, + tt::LitKind::Str => bridge::LitKind::Str, + tt::LitKind::StrRaw(r) => bridge::LitKind::StrRaw(r), + tt::LitKind::ByteStr => bridge::LitKind::ByteStr, + tt::LitKind::ByteStrRaw(r) => bridge::LitKind::ByteStrRaw(r), + tt::LitKind::CStr => bridge::LitKind::CStr, + tt::LitKind::CStrRaw(r) => bridge::LitKind::CStrRaw(r), + tt::LitKind::Err(_) => bridge::LitKind::ErrWithGuar, + } +} + +fn literal_kind_to_internal(kind: bridge::LitKind) -> tt::LitKind { + match kind { + bridge::LitKind::Byte => tt::LitKind::Byte, + bridge::LitKind::Char => tt::LitKind::Char, + bridge::LitKind::Str => tt::LitKind::Str, + bridge::LitKind::StrRaw(r) => tt::LitKind::StrRaw(r), + bridge::LitKind::ByteStr => tt::LitKind::ByteStr, + bridge::LitKind::ByteStrRaw(r) => tt::LitKind::ByteStrRaw(r), + bridge::LitKind::CStr => tt::LitKind::CStr, + bridge::LitKind::CStrRaw(r) => tt::LitKind::CStrRaw(r), + bridge::LitKind::Integer => tt::LitKind::Integer, + bridge::LitKind::Float => tt::LitKind::Float, + bridge::LitKind::ErrWithGuar => tt::LitKind::Err(()), } } diff --git a/crates/proc-macro-srv/src/server_impl/rust_analyzer_span.rs b/crates/proc-macro-srv/src/server_impl/rust_analyzer_span.rs index bb174ba1b2..1a71f39612 100644 --- a/crates/proc-macro-srv/src/server_impl/rust_analyzer_span.rs +++ b/crates/proc-macro-srv/src/server_impl/rust_analyzer_span.rs @@ -15,7 +15,7 @@ use span::{Span, FIXUP_ERASED_FILE_AST_ID_MARKER}; use tt::{TextRange, TextSize}; use crate::server_impl::{ - delim_to_external, delim_to_internal, literal_with_stringify_parts, + delim_to_external, delim_to_internal, literal_kind_to_external, literal_kind_to_internal, token_stream::TokenStreamBuilder, Symbol, SymbolInternerRef, SYMBOL_INTERNER, }; mod tt { @@ -171,20 +171,24 @@ impl server::TokenStream for RaSpanServer { bridge::TokenTree::Ident(ident) => { let text = ident.sym.text(self.interner); - let text = - if ident.is_raw { ::tt::SmolStr::from_iter(["r#", &text]) } else { text }; - let ident: tt::Ident = tt::Ident { text, span: ident.span }; + let ident: tt::Ident = tt::Ident { + text, + span: ident.span, + is_raw: if ident.is_raw { tt::IdentIsRaw::Yes } else { tt::IdentIsRaw::No }, + }; let leaf = tt::Leaf::from(ident); let tree = tt::TokenTree::from(leaf); Self::TokenStream::from_iter(iter::once(tree)) } bridge::TokenTree::Literal(literal) => { - let text = literal_with_stringify_parts(&literal, self.interner, |parts| { - ::tt::SmolStr::from_iter(parts.iter().copied()) - }); + let literal = tt::Literal { + text: literal.symbol.text(self.interner), + suffix: literal.suffix.map(|it| Box::new(it.text(self.interner))), + span: literal.span, + kind: literal_kind_to_internal(literal.kind), + }; - let literal = tt::Literal { text, span: literal.span }; let leaf: tt::Leaf = tt::Leaf::from(literal); let tree = tt::TokenTree::from(leaf); Self::TokenStream::from_iter(iter::once(tree)) @@ -250,23 +254,18 @@ impl server::TokenStream for RaSpanServer { .into_iter() .map(|tree| match tree { tt::TokenTree::Leaf(tt::Leaf::Ident(ident)) => { - bridge::TokenTree::Ident(match ident.text.strip_prefix("r#") { - Some(text) => bridge::Ident { - sym: Symbol::intern(self.interner, text), - is_raw: true, - span: ident.span, - }, - None => bridge::Ident { - sym: Symbol::intern(self.interner, &ident.text), - is_raw: false, - span: ident.span, - }, + bridge::TokenTree::Ident(bridge::Ident { + sym: Symbol::intern(self.interner, &ident.text), + is_raw: ident.is_raw.yes(), + span: ident.span, }) } tt::TokenTree::Leaf(tt::Leaf::Literal(lit)) => { bridge::TokenTree::Literal(bridge::Literal { span: lit.span, - ..server::FreeFunctions::literal_from_str(self, &lit.text).unwrap() + kind: literal_kind_to_external(lit.kind), + symbol: Symbol::intern(self.interner, &lit.text), + suffix: lit.suffix.map(|it| Symbol::intern(self.interner, &it)), }) } tt::TokenTree::Leaf(tt::Leaf::Punct(punct)) => { diff --git a/crates/proc-macro-srv/src/server_impl/token_id.rs b/crates/proc-macro-srv/src/server_impl/token_id.rs index 12edacbe39..94d5748b08 100644 --- a/crates/proc-macro-srv/src/server_impl/token_id.rs +++ b/crates/proc-macro-srv/src/server_impl/token_id.rs @@ -8,7 +8,7 @@ use std::{ use proc_macro::bridge::{self, server}; use crate::server_impl::{ - delim_to_external, delim_to_internal, literal_with_stringify_parts, + delim_to_external, delim_to_internal, literal_kind_to_external, literal_kind_to_internal, token_stream::TokenStreamBuilder, Symbol, SymbolInternerRef, SYMBOL_INTERNER, }; mod tt { @@ -25,10 +25,8 @@ mod tt { } type Group = tt::Subtree; type TokenTree = tt::TokenTree; -#[allow(unused)] type Punct = tt::Punct; type Spacing = tt::Spacing; -#[allow(unused)] type Literal = tt::Literal; type Span = tt::TokenId; type TokenStream = crate::server_impl::TokenStream; @@ -162,20 +160,23 @@ impl server::TokenStream for TokenIdServer { bridge::TokenTree::Ident(ident) => { let text = ident.sym.text(self.interner); - let text = - if ident.is_raw { ::tt::SmolStr::from_iter(["r#", &text]) } else { text }; - let ident: tt::Ident = tt::Ident { text, span: ident.span }; + let ident: tt::Ident = tt::Ident { + text, + span: ident.span, + is_raw: if ident.is_raw { tt::IdentIsRaw::Yes } else { tt::IdentIsRaw::No }, + }; let leaf = tt::Leaf::from(ident); let tree = TokenTree::from(leaf); Self::TokenStream::from_iter(iter::once(tree)) } bridge::TokenTree::Literal(literal) => { - let text = literal_with_stringify_parts(&literal, self.interner, |parts| { - ::tt::SmolStr::from_iter(parts.iter().copied()) - }); - - let literal = tt::Literal { text, span: literal.span }; + let literal = Literal { + text: literal.symbol.text(self.interner), + suffix: literal.suffix.map(|it| Box::new(it.text(self.interner))), + span: literal.span, + kind: literal_kind_to_internal(literal.kind), + }; let leaf = tt::Leaf::from(literal); let tree = TokenTree::from(leaf); @@ -183,7 +184,7 @@ impl server::TokenStream for TokenIdServer { } bridge::TokenTree::Punct(p) => { - let punct = tt::Punct { + let punct = Punct { char: p.ch as char, spacing: if p.joint { Spacing::Joint } else { Spacing::Alone }, span: p.span, @@ -238,16 +239,17 @@ impl server::TokenStream for TokenIdServer { .map(|tree| match tree { tt::TokenTree::Leaf(tt::Leaf::Ident(ident)) => { bridge::TokenTree::Ident(bridge::Ident { - sym: Symbol::intern(self.interner, ident.text.trim_start_matches("r#")), - is_raw: ident.text.starts_with("r#"), + sym: Symbol::intern(self.interner, &ident.text), + is_raw: ident.is_raw.yes(), span: ident.span, }) } tt::TokenTree::Leaf(tt::Leaf::Literal(lit)) => { bridge::TokenTree::Literal(bridge::Literal { span: lit.span, - ..server::FreeFunctions::literal_from_str(self, &lit.text) - .unwrap_or_else(|_| panic!("`{}`", lit.text)) + kind: literal_kind_to_external(lit.kind), + symbol: Symbol::intern(self.interner, &lit.text), + suffix: lit.suffix.map(|it| Symbol::intern(self.interner, &it)), }) } tt::TokenTree::Leaf(tt::Leaf::Punct(punct)) => { @@ -383,10 +385,12 @@ mod tests { tt::TokenTree::Leaf(tt::Leaf::Ident(tt::Ident { text: "struct".into(), span: tt::TokenId(0), + is_raw: tt::IdentIsRaw::No, })), tt::TokenTree::Leaf(tt::Leaf::Ident(tt::Ident { text: "T".into(), span: tt::TokenId(0), + is_raw: tt::IdentIsRaw::No, })), tt::TokenTree::Subtree(tt::Subtree { delimiter: tt::Delimiter { @@ -411,6 +415,7 @@ mod tests { kind: tt::DelimiterKind::Parenthesis, }, token_trees: Box::new([tt::TokenTree::Leaf(tt::Leaf::Ident(tt::Ident { + is_raw: tt::IdentIsRaw::No, text: "a".into(), span: tt::TokenId(0), }))]), @@ -430,6 +435,7 @@ mod tests { tt::TokenTree::Leaf(tt::Leaf::Ident(tt::Ident { text: "_".into(), span: tt::TokenId(0), + is_raw: tt::IdentIsRaw::No, })) ); } diff --git a/crates/proc-macro-srv/src/tests/mod.rs b/crates/proc-macro-srv/src/tests/mod.rs index 6334282538..dc6e71163b 100644 --- a/crates/proc-macro-srv/src/tests/mod.rs +++ b/crates/proc-macro-srv/src/tests/mod.rs @@ -21,20 +21,20 @@ fn test_derive_error() { assert_expand( "DeriveError", r#"struct S;"#, - expect![[r##" + expect![[r#" SUBTREE $$ 1 1 IDENT compile_error 1 PUNCH ! [alone] 1 SUBTREE () 1 1 - LITERAL "#[derive(DeriveError)] struct S ;"1 - PUNCH ; [alone] 1"##]], - expect![[r##" + LITERAL Str #[derive(DeriveError)] struct S ; 1 + PUNCH ; [alone] 1"#]], + expect![[r#" SUBTREE $$ 42:2@0..100#0 42:2@0..100#0 IDENT compile_error 42:2@0..100#0 PUNCH ! [alone] 42:2@0..100#0 SUBTREE () 42:2@0..100#0 42:2@0..100#0 - LITERAL "#[derive(DeriveError)] struct S ;"42:2@0..100#0 - PUNCH ; [alone] 42:2@0..100#0"##]], + LITERAL Str #[derive(DeriveError)] struct S ; 42:2@0..100#0 + PUNCH ; [alone] 42:2@0..100#0"#]], ); } @@ -47,18 +47,18 @@ fn test_fn_like_macro_noop() { SUBTREE $$ 1 1 IDENT ident 1 PUNCH , [alone] 1 - LITERAL 01 + LITERAL Integer 0 1 PUNCH , [alone] 1 - LITERAL 11 + LITERAL Integer 1 1 PUNCH , [alone] 1 SUBTREE [] 1 1"#]], expect![[r#" SUBTREE $$ 42:2@0..100#0 42:2@0..100#0 IDENT ident 42:2@0..5#0 PUNCH , [alone] 42:2@5..6#0 - LITERAL 042:2@7..8#0 + LITERAL Integer 0 42:2@7..8#0 PUNCH , [alone] 42:2@8..9#0 - LITERAL 142:2@10..11#0 + LITERAL Integer 1 42:2@10..11#0 PUNCH , [alone] 42:2@11..12#0 SUBTREE [] 42:2@13..14#0 42:2@14..15#0"#]], ); @@ -135,22 +135,22 @@ fn test_fn_like_mk_literals() { r#""#, expect![[r#" SUBTREE $$ 1 1 - LITERAL b"byte_string"1 - LITERAL 'c'1 - LITERAL "string"1 - LITERAL 3.14f641 - LITERAL 3.141 - LITERAL 123i641 - LITERAL 1231"#]], + LITERAL ByteStr byte_string 1 + LITERAL Char c 1 + LITERAL Str string 1 + LITERAL Float 3.14f64 1 + LITERAL Float 3.14 1 + LITERAL Integer 123i64 1 + LITERAL Integer 123 1"#]], expect![[r#" SUBTREE $$ 42:2@0..100#0 42:2@0..100#0 - LITERAL b"byte_string"42:2@0..100#0 - LITERAL 'c'42:2@0..100#0 - LITERAL "string"42:2@0..100#0 - LITERAL 3.14f6442:2@0..100#0 - LITERAL 3.1442:2@0..100#0 - LITERAL 123i6442:2@0..100#0 - LITERAL 12342:2@0..100#0"#]], + LITERAL ByteStr byte_string 42:2@0..100#0 + LITERAL Char c 42:2@0..100#0 + LITERAL Str string 42:2@0..100#0 + LITERAL Float 3.14f64 42:2@0..100#0 + LITERAL Float 3.14 42:2@0..100#0 + LITERAL Integer 123i64 42:2@0..100#0 + LITERAL Integer 123 42:2@0..100#0"#]], ); } @@ -175,50 +175,50 @@ fn test_fn_like_macro_clone_literals() { assert_expand( "fn_like_clone_tokens", r###"1u16, 2_u32, -4i64, 3.14f32, "hello bridge", "suffixed"suffix, r##"raw"##, 'a', b'b', c"null""###, - expect![[r###" + expect![[r#" SUBTREE $$ 1 1 - LITERAL 1u161 + LITERAL Integer 1u16 1 PUNCH , [alone] 1 - LITERAL 2_u321 + LITERAL Integer 2_u32 1 PUNCH , [alone] 1 PUNCH - [alone] 1 - LITERAL 4i641 + LITERAL Integer 4i64 1 PUNCH , [alone] 1 - LITERAL 3.14f321 + LITERAL Float 3.14f32 1 PUNCH , [alone] 1 - LITERAL "hello bridge"1 + LITERAL Str hello bridge 1 PUNCH , [alone] 1 - LITERAL "suffixed"suffix1 + LITERAL Str suffixedsuffix 1 PUNCH , [alone] 1 - LITERAL r##"raw"##1 + LITERAL StrRaw(2) raw 1 PUNCH , [alone] 1 - LITERAL 'a'1 + LITERAL Char a 1 PUNCH , [alone] 1 - LITERAL b'b'1 + LITERAL Byte b 1 PUNCH , [alone] 1 - LITERAL c"null"1"###]], - expect![[r###" + LITERAL CStr null 1"#]], + expect![[r#" SUBTREE $$ 42:2@0..100#0 42:2@0..100#0 - LITERAL 1u1642:2@0..4#0 + LITERAL Integer 1u16 42:2@0..4#0 PUNCH , [alone] 42:2@4..5#0 - LITERAL 2_u3242:2@6..11#0 + LITERAL Integer 2_u32 42:2@6..11#0 PUNCH , [alone] 42:2@11..12#0 PUNCH - [alone] 42:2@13..14#0 - LITERAL 4i6442:2@14..18#0 + LITERAL Integer 4i64 42:2@14..18#0 PUNCH , [alone] 42:2@18..19#0 - LITERAL 3.14f3242:2@20..27#0 + LITERAL Float 3.14f32 42:2@20..27#0 PUNCH , [alone] 42:2@27..28#0 - LITERAL "hello bridge"42:2@29..43#0 + LITERAL Str hello bridge 42:2@29..43#0 PUNCH , [alone] 42:2@43..44#0 - LITERAL "suffixed"suffix42:2@45..61#0 + LITERAL Str suffixedsuffix 42:2@45..61#0 PUNCH , [alone] 42:2@61..62#0 - LITERAL r##"raw"##42:2@63..73#0 + LITERAL StrRaw(2) raw 42:2@63..73#0 PUNCH , [alone] 42:2@73..74#0 - LITERAL 'a'42:2@75..78#0 + LITERAL Char a 42:2@75..78#0 PUNCH , [alone] 42:2@78..79#0 - LITERAL b'b'42:2@80..84#0 + LITERAL Byte b 42:2@80..84#0 PUNCH , [alone] 42:2@84..85#0 - LITERAL c"null"42:2@86..93#0"###]], + LITERAL CStr null 42:2@86..93#0"#]], ); } @@ -231,20 +231,20 @@ fn test_attr_macro() { "attr_error", r#"mod m {}"#, r#"some arguments"#, - expect![[r##" + expect![[r#" SUBTREE $$ 1 1 IDENT compile_error 1 PUNCH ! [alone] 1 SUBTREE () 1 1 - LITERAL "#[attr_error(some arguments)] mod m {}"1 - PUNCH ; [alone] 1"##]], - expect![[r##" + LITERAL Str #[attr_error(some arguments)] mod m {} 1 + PUNCH ; [alone] 1"#]], + expect![[r#" SUBTREE $$ 42:2@0..100#0 42:2@0..100#0 IDENT compile_error 42:2@0..100#0 PUNCH ! [alone] 42:2@0..100#0 SUBTREE () 42:2@0..100#0 42:2@0..100#0 - LITERAL "#[attr_error(some arguments)] mod m {}"42:2@0..100#0 - PUNCH ; [alone] 42:2@0..100#0"##]], + LITERAL Str #[attr_error(some arguments)] mod m {} 42:2@0..100#0 + PUNCH ; [alone] 42:2@0..100#0"#]], ); } diff --git a/crates/salsa/tests/macros.rs b/crates/salsa/tests/macros.rs index 3d818e53c8..9b07740e7d 100644 --- a/crates/salsa/tests/macros.rs +++ b/crates/salsa/tests/macros.rs @@ -5,6 +5,7 @@ trait MyDatabase: salsa::Database { } mod another_module { + #[allow(dead_code)] pub(crate) fn another_name(_: &dyn crate::MyDatabase, (): ()) {} } diff --git a/crates/tt/src/lib.rs b/crates/tt/src/lib.rs index 369744d0e9..24fd0abada 100644 --- a/crates/tt/src/lib.rs +++ b/crates/tt/src/lib.rs @@ -12,8 +12,54 @@ use stdx::impl_from; pub use smol_str::SmolStr; pub use text_size::{TextRange, TextSize}; +#[derive(Clone, PartialEq, Debug)] +pub struct Lit { + pub kind: LitKind, + pub symbol: SmolStr, + pub suffix: Option, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum IdentIsRaw { + No, + Yes, +} +impl IdentIsRaw { + pub fn yes(self) -> bool { + matches!(self, IdentIsRaw::Yes) + } + pub fn as_str(self) -> &'static str { + match self { + IdentIsRaw::No => "", + IdentIsRaw::Yes => "r#", + } + } + pub fn split_from_symbol(sym: &str) -> (Self, &str) { + if let Some(sym) = sym.strip_prefix("r#") { + (IdentIsRaw::Yes, sym) + } else { + (IdentIsRaw::No, sym) + } + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum LitKind { + Byte, + Char, + Integer, // e.g. `1`, `1u8`, `1f32` + Float, // e.g. `1.`, `1.0`, `1e3f32` + Str, + StrRaw(u8), // raw string delimited by `n` hash symbols + ByteStr, + ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols + CStr, + CStrRaw(u8), + Err(()), +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum TokenTree { +pub enum TokenTree { Leaf(Leaf), Subtree(Subtree), } @@ -103,6 +149,15 @@ pub struct DelimSpan { pub close: S, } +impl DelimSpan { + pub fn from_single(sp: Span) -> Self { + DelimSpan { open: sp, close: sp } + } + + pub fn from_pair(open: Span, close: Span) -> Self { + DelimSpan { open, close } + } +} #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct Delimiter { pub open: S, @@ -134,8 +189,11 @@ pub enum DelimiterKind { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Literal { + // escaped pub text: SmolStr, pub span: S, + pub kind: LitKind, + pub suffix: Option>, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -145,23 +203,85 @@ pub struct Punct { pub span: S, } +/// Indicates whether a token can join with the following token to form a +/// compound token. Used for conversions to `proc_macro::Spacing`. Also used to +/// guide pretty-printing, which is where the `JointHidden` value (which isn't +/// part of `proc_macro::Spacing`) comes in useful. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Spacing { + /// The token cannot join with the following token to form a compound + /// token. + /// + /// In token streams parsed from source code, the compiler will use `Alone` + /// for any token immediately followed by whitespace, a non-doc comment, or + /// EOF. + /// + /// When constructing token streams within the compiler, use this for each + /// token that (a) should be pretty-printed with a space after it, or (b) + /// is the last token in the stream. (In the latter case the choice of + /// spacing doesn't matter because it is never used for the last token. We + /// arbitrarily use `Alone`.) + /// + /// Converts to `proc_macro::Spacing::Alone`, and + /// `proc_macro::Spacing::Alone` converts back to this. Alone, - /// Whether the following token is joint to this one. + + /// The token can join with the following token to form a compound token. + /// + /// In token streams parsed from source code, the compiler will use `Joint` + /// for any token immediately followed by punctuation (as determined by + /// `Token::is_punct`). + /// + /// When constructing token streams within the compiler, use this for each + /// token that (a) should be pretty-printed without a space after it, and + /// (b) is followed by a punctuation token. + /// + /// Converts to `proc_macro::Spacing::Joint`, and + /// `proc_macro::Spacing::Joint` converts back to this. Joint, + + /// The token can join with the following token to form a compound token, + /// but this will not be visible at the proc macro level. (This is what the + /// `Hidden` means; see below.) + /// + /// In token streams parsed from source code, the compiler will use + /// `JointHidden` for any token immediately followed by anything not + /// covered by the `Alone` and `Joint` cases: an identifier, lifetime, + /// literal, delimiter, doc comment. + /// + /// When constructing token streams, use this for each token that (a) + /// should be pretty-printed without a space after it, and (b) is followed + /// by a non-punctuation token. + /// + /// Converts to `proc_macro::Spacing::Alone`, but + /// `proc_macro::Spacing::Alone` converts back to `token::Spacing::Alone`. + /// Because of that, pretty-printing of `TokenStream`s produced by proc + /// macros is unavoidably uglier (with more whitespace between tokens) than + /// pretty-printing of `TokenStream`'s produced by other means (i.e. parsed + /// source code, internally constructed token streams, and token streams + /// produced by declarative macros). + JointHidden, } +/// Identifier or keyword. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -/// Identifier or keyword. Unlike rustc, we keep "r#" prefix when it represents a raw identifier. pub struct Ident { pub text: SmolStr, pub span: S, + pub is_raw: IdentIsRaw, } impl Ident { - pub fn new(text: impl Into, span: S) -> Self { - Ident { text: text.into(), span } + pub fn new(text: impl Into + AsRef, span: S) -> Self { + let t = text.as_ref(); + // let raw_stripped = IdentIsRaw::split_from_symbol(text.as_ref()); + let raw_stripped = t.strip_prefix("r#"); + let is_raw = if raw_stripped.is_none() { IdentIsRaw::No } else { IdentIsRaw::Yes }; + let text = match raw_stripped { + Some(derawed) => derawed.into(), + None => text.into(), + }; + Ident { text, span, is_raw } } } @@ -207,22 +327,35 @@ fn print_debug_token( match tkn { TokenTree::Leaf(leaf) => match leaf { Leaf::Literal(lit) => { - write!(f, "{}LITERAL {}", align, lit.text)?; - fmt::Debug::fmt(&lit.span, f)?; + write!( + f, + "{}LITERAL {:?} {}{} {:#?}", + align, + lit.kind, + lit.text, + lit.suffix.as_ref().map(|it| &***it).unwrap_or(""), + lit.span + )?; } Leaf::Punct(punct) => { write!( f, - "{}PUNCH {} [{}] ", + "{}PUNCH {} [{}] {:#?}", align, punct.char, if punct.spacing == Spacing::Alone { "alone" } else { "joint" }, + punct.span )?; - fmt::Debug::fmt(&punct.span, f)?; } Leaf::Ident(ident) => { - write!(f, "{}IDENT {} ", align, ident.text)?; - fmt::Debug::fmt(&ident.span, f)?; + write!( + f, + "{}IDENT {}{} {:#?}", + align, + ident.is_raw.as_str(), + ident.text, + ident.span + )?; } }, TokenTree::Subtree(subtree) => { @@ -288,13 +421,52 @@ impl fmt::Display for Leaf { impl fmt::Display for Ident { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.is_raw.as_str(), f)?; fmt::Display::fmt(&self.text, f) } } impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.text, f) + match self.kind { + LitKind::Byte => write!(f, "b'{}'", self.text), + LitKind::Char => write!(f, "'{}'", self.text), + LitKind::Integer | LitKind::Float | LitKind::Err(_) => write!(f, "{}", self.text), + LitKind::Str => write!(f, "\"{}\"", self.text), + LitKind::ByteStr => write!(f, "b\"{}\"", self.text), + LitKind::CStr => write!(f, "c\"{}\"", self.text), + LitKind::StrRaw(num_of_hashes) => { + let num_of_hashes = num_of_hashes as usize; + write!( + f, + r#"r{0:# { + let num_of_hashes = num_of_hashes as usize; + write!( + f, + r#"br{0:# { + let num_of_hashes = num_of_hashes as usize; + write!( + f, + r#"cr{0:# Subtree { let s = match it { Leaf::Literal(it) => it.text.to_string(), Leaf::Punct(it) => it.char.to_string(), - Leaf::Ident(it) => it.text.to_string(), + Leaf::Ident(it) => format!("{}{}", it.is_raw.as_str(), it.text), }; match (it, last) { (Leaf::Ident(_), Some(&TokenTree::Leaf(Leaf::Ident(_)))) => { @@ -369,7 +541,9 @@ impl Subtree { pub fn pretty(tkns: &[TokenTree]) -> String { fn tokentree_to_text(tkn: &TokenTree) -> String { match tkn { - TokenTree::Leaf(Leaf::Ident(ident)) => ident.text.clone().into(), + TokenTree::Leaf(Leaf::Ident(ident)) => { + format!("{}{}", ident.is_raw.as_str(), ident.text) + } TokenTree::Leaf(Leaf::Literal(literal)) => literal.text.clone().into(), TokenTree::Leaf(Leaf::Punct(punct)) => format!("{}", punct.char), TokenTree::Subtree(subtree) => {