From abe8f1ece416e5627fa1e5cd30b86110f3f7d91b Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 5 Sep 2023 19:06:15 +0200 Subject: [PATCH] Implement builtin#format_args, using rustc's format_args parser --- Cargo.lock | 1 + crates/hir-def/Cargo.toml | 1 + crates/hir-def/src/body/lower.rs | 59 +- crates/hir-def/src/body/pretty.rs | 5 + crates/hir-def/src/hir.rs | 14 +- crates/hir-def/src/hir/format_args.rs | 511 ++++++++ crates/hir-def/src/hir/format_args/parse.rs | 1023 +++++++++++++++++ crates/hir-expand/src/name.rs | 6 + crates/hir-ty/src/infer/closure.rs | 13 +- crates/hir-ty/src/infer/expr.rs | 22 +- crates/hir-ty/src/infer/mutability.rs | 12 +- crates/hir-ty/src/mir/lower.rs | 3 + crates/hir-ty/src/tests/simple.rs | 22 + crates/parser/src/grammar/expressions/atom.rs | 20 +- crates/parser/src/syntax_kind/generated.rs | 1 + .../parser/inline/ok/0207_builtin_expr.rs | 2 +- crates/syntax/rust.ungram | 8 +- crates/syntax/src/ast/generated/nodes.rs | 30 + crates/syntax/src/tests/ast_src.rs | 1 + 19 files changed, 1740 insertions(+), 14 deletions(-) create mode 100644 crates/hir-def/src/hir/format_args.rs create mode 100644 crates/hir-def/src/hir/format_args/parse.rs diff --git a/Cargo.lock b/Cargo.lock index f6900f883a..c3d7b3f79b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -541,6 +541,7 @@ dependencies = [ "mbe", "once_cell", "profile", + "ra-ap-rustc_lexer", "rustc-hash", "smallvec", "stdx", diff --git a/crates/hir-def/Cargo.toml b/crates/hir-def/Cargo.toml index 30307deb79..4640ee5140 100644 --- a/crates/hir-def/Cargo.toml +++ b/crates/hir-def/Cargo.toml @@ -33,6 +33,7 @@ triomphe.workspace = true rustc_abi = { version = "0.0.20221221", package = "hkalbasi-rustc-ap-rustc_abi", default-features = false } rustc_index = { version = "0.0.20221221", package = "hkalbasi-rustc-ap-rustc_index", default-features = false } +rustc_lexer = { version = "0.1.0", package = "ra-ap-rustc_lexer" } # local deps stdx.workspace = true diff --git a/crates/hir-def/src/body/lower.rs b/crates/hir-def/src/body/lower.rs index 038032e05d..bb0127c9ef 100644 --- a/crates/hir-def/src/body/lower.rs +++ b/crates/hir-def/src/body/lower.rs @@ -29,9 +29,13 @@ use crate::{ db::DefDatabase, expander::Expander, hir::{ - dummy_expr_id, Array, Binding, BindingAnnotation, BindingId, BindingProblems, CaptureBy, - ClosureKind, Expr, ExprId, InlineAsm, Label, LabelId, Literal, LiteralOrConst, MatchArm, - Movability, OffsetOf, Pat, PatId, RecordFieldPat, RecordLitField, Statement, + dummy_expr_id, + format_args::{ + self, FormatArgs, FormatArgument, FormatArgumentKind, FormatArgumentsCollector, + }, + Array, Binding, BindingAnnotation, BindingId, BindingProblems, CaptureBy, ClosureKind, + Expr, ExprId, InlineAsm, Label, LabelId, Literal, LiteralOrConst, MatchArm, Movability, + OffsetOf, Pat, PatId, RecordFieldPat, RecordLitField, Statement, }, item_scope::BuiltinShadowMode, lang_item::LangItem, @@ -649,15 +653,58 @@ impl ExprCollector<'_> { } ast::Expr::UnderscoreExpr(_) => self.alloc_expr(Expr::Underscore, syntax_ptr), ast::Expr::AsmExpr(e) => { - let expr = Expr::InlineAsm(InlineAsm { e: self.collect_expr_opt(e.expr()) }); - self.alloc_expr(expr, syntax_ptr) + let e = self.collect_expr_opt(e.expr()); + self.alloc_expr(Expr::InlineAsm(InlineAsm { e }), syntax_ptr) } ast::Expr::OffsetOfExpr(e) => { let container = Interned::new(TypeRef::from_ast_opt(&self.ctx(), e.ty())); let fields = e.fields().map(|it| it.as_name()).collect(); self.alloc_expr(Expr::OffsetOf(OffsetOf { container, fields }), syntax_ptr) } - ast::Expr::FormatArgsExpr(_) => self.missing_expr(), + ast::Expr::FormatArgsExpr(f) => { + let mut args = FormatArgumentsCollector::new(); + f.args().for_each(|arg| { + args.add(FormatArgument { + kind: match arg.name() { + Some(name) => FormatArgumentKind::Named(name.as_name()), + None => FormatArgumentKind::Normal, + }, + expr: self.collect_expr_opt(arg.expr()), + }); + }); + let template = f.template(); + let fmt_snippet = template.as_ref().map(ToString::to_string); + let expr = self.collect_expr_opt(f.template()); + if let Expr::Literal(Literal::String(_)) = self.body[expr] { + let source = self.source_map.expr_map_back[expr].clone(); + let is_direct_literal = source.file_id == self.expander.current_file_id; + if let ast::Expr::Literal(l) = + source.value.to_node(&self.db.parse_or_expand(source.file_id)) + { + if let ast::LiteralKind::String(s) = l.kind() { + return Some(self.alloc_expr( + Expr::FormatArgs(format_args::parse( + expr, + &s, + fmt_snippet, + args, + is_direct_literal, + )), + syntax_ptr, + )); + } + } + } + + self.alloc_expr( + Expr::FormatArgs(FormatArgs { + template_expr: expr, + template: Default::default(), + arguments: args.finish(), + }), + syntax_ptr, + ) + } }) } diff --git a/crates/hir-def/src/body/pretty.rs b/crates/hir-def/src/body/pretty.rs index 602a7983c7..b67ed2fb38 100644 --- a/crates/hir-def/src/body/pretty.rs +++ b/crates/hir-def/src/body/pretty.rs @@ -156,6 +156,11 @@ impl Printer<'_> { Expr::Missing => w!(self, "�"), Expr::Underscore => w!(self, "_"), Expr::InlineAsm(_) => w!(self, "builtin#asm(_)"), + Expr::FormatArgs(_fmt_args) => { + w!(self, "builtin#format_args("); + // FIXME + w!(self, ")"); + } Expr::OffsetOf(offset_of) => { w!(self, "builtin#offset_of("); self.print_type_ref(&offset_of.container); diff --git a/crates/hir-def/src/hir.rs b/crates/hir-def/src/hir.rs index 1c86af456d..98220de388 100644 --- a/crates/hir-def/src/hir.rs +++ b/crates/hir-def/src/hir.rs @@ -13,6 +13,7 @@ //! See also a neighboring `body` module. pub mod type_ref; +pub mod format_args; use std::fmt; @@ -24,6 +25,7 @@ use syntax::ast; use crate::{ builtin_type::{BuiltinFloat, BuiltinInt, BuiltinUint}, + hir::format_args::{FormatArgs, FormatArgumentKind}, path::{GenericArgs, Path}, type_ref::{Mutability, Rawness, TypeRef}, BlockId, ConstBlockId, @@ -117,7 +119,6 @@ impl From for Literal { fn from(ast_lit_kind: ast::LiteralKind) -> Self { use ast::LiteralKind; match ast_lit_kind { - // FIXME: these should have actual values filled in, but unsure on perf impact LiteralKind::IntNumber(lit) => { if let builtin @ Some(_) = lit.suffix().and_then(BuiltinFloat::from_suffix) { Literal::Float( @@ -283,6 +284,7 @@ pub enum Expr { Underscore, OffsetOf(OffsetOf), InlineAsm(InlineAsm), + FormatArgs(FormatArgs), } #[derive(Debug, Clone, PartialEq, Eq)] @@ -355,7 +357,15 @@ impl Expr { match self { Expr::Missing => {} Expr::Path(_) | Expr::OffsetOf(_) => {} - Expr::InlineAsm(e) => f(e.e), + Expr::InlineAsm(it) => f(it.e), + Expr::FormatArgs(it) => { + f(it.template_expr); + it.arguments + .arguments + .iter() + .filter(|it| !matches!(it.kind, FormatArgumentKind::Captured(_))) + .for_each(|it| f(it.expr)); + } Expr::If { condition, then_branch, else_branch } => { f(*condition); f(*then_branch); diff --git a/crates/hir-def/src/hir/format_args.rs b/crates/hir-def/src/hir/format_args.rs new file mode 100644 index 0000000000..8fa8b7246b --- /dev/null +++ b/crates/hir-def/src/hir/format_args.rs @@ -0,0 +1,511 @@ +use std::mem; + +use hir_expand::name::Name; +use syntax::{ + ast::{self, IsString}, + AstToken, SmolStr, TextRange, +}; + +use crate::hir::{dummy_expr_id, ExprId}; + +mod parse; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FormatArgs { + pub template_expr: ExprId, + pub template: Box<[FormatArgsPiece]>, + pub arguments: FormatArguments, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FormatArguments { + pub arguments: Box<[FormatArgument]>, + pub num_unnamed_args: usize, + pub num_explicit_args: usize, + pub names: Box<[(Name, usize)]>, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum FormatArgsPiece { + Literal(Box), + Placeholder(FormatPlaceholder), +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct FormatPlaceholder { + /// Index into [`FormatArgs::arguments`]. + pub argument: FormatArgPosition, + /// The span inside the format string for the full `{…}` placeholder. + pub span: Option, + /// `{}`, `{:?}`, or `{:x}`, etc. + pub format_trait: FormatTrait, + /// `{}` or `{:.5}` or `{:-^20}`, etc. + pub format_options: FormatOptions, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FormatArgPosition { + /// Which argument this position refers to (Ok), + /// or would've referred to if it existed (Err). + pub index: Result, + /// What kind of position this is. See [`FormatArgPositionKind`]. + pub kind: FormatArgPositionKind, + /// The span of the name or number. + pub span: Option, +} + +#[derive(Copy, Debug, Clone, PartialEq, Eq)] +pub enum FormatArgPositionKind { + /// `{}` or `{:.*}` + Implicit, + /// `{1}` or `{:1$}` or `{:.1$}` + Number, + /// `{a}` or `{:a$}` or `{:.a$}` + Named, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum FormatTrait { + /// `{}` + Display, + /// `{:?}` + Debug, + /// `{:e}` + LowerExp, + /// `{:E}` + UpperExp, + /// `{:o}` + Octal, + /// `{:p}` + Pointer, + /// `{:b}` + Binary, + /// `{:x}` + LowerHex, + /// `{:X}` + UpperHex, +} + +#[derive(Clone, Default, Debug, PartialEq, Eq)] +pub struct FormatOptions { + /// The width. E.g. `{:5}` or `{:width$}`. + pub width: Option, + /// The precision. E.g. `{:.5}` or `{:.precision$}`. + pub precision: Option, + /// The alignment. E.g. `{:>}` or `{:<}` or `{:^}`. + pub alignment: Option, + /// The fill character. E.g. the `.` in `{:.>10}`. + pub fill: Option, + /// The `+` or `-` flag. + pub sign: Option, + /// The `#` flag. + pub alternate: bool, + /// The `0` flag. E.g. the `0` in `{:02x}`. + pub zero_pad: bool, + /// The `x` or `X` flag (for `Debug` only). E.g. the `x` in `{:x?}`. + pub debug_hex: Option, +} +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum FormatSign { + /// The `+` flag. + Plus, + /// The `-` flag. + Minus, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum FormatDebugHex { + /// The `x` flag in `{:x?}`. + Lower, + /// The `X` flag in `{:X?}`. + Upper, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum FormatAlignment { + /// `{:<}` + Left, + /// `{:>}` + Right, + /// `{:^}` + Center, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum FormatCount { + /// `{:5}` or `{:.5}` + Literal(usize), + /// `{:.*}`, `{:.5$}`, or `{:a$}`, etc. + Argument(FormatArgPosition), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FormatArgument { + pub kind: FormatArgumentKind, + pub expr: ExprId, +} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum FormatArgumentKind { + /// `format_args(…, arg)` + Normal, + /// `format_args(…, arg = 1)` + Named(Name), + /// `format_args("… {arg} …")` + Captured(Name), +} + +// Only used in parse_args and report_invalid_references, +// to indicate how a referred argument was used. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum PositionUsedAs { + Placeholder(Option), + Precision, + Width, +} +use PositionUsedAs::*; + +pub(crate) fn parse( + expr: ExprId, + s: &ast::String, + fmt_snippet: Option, + mut args: FormatArgumentsCollector, + is_direct_literal: bool, +) -> FormatArgs { + let text = s.text(); + let str_style = match s.quote_offsets() { + Some(offsets) => { + let raw = u32::from(offsets.quotes.0.len()) - 1; + (raw != 0).then_some(raw as usize) + } + None => None, + }; + let mut parser = + parse::Parser::new(text, str_style, fmt_snippet, false, parse::ParseMode::Format); + + let mut pieces = Vec::new(); + while let Some(piece) = parser.next() { + if !parser.errors.is_empty() { + break; + } else { + pieces.push(piece); + } + } + let is_source_literal = parser.is_source_literal; + if !parser.errors.is_empty() { + // FIXME: Diagnose + return FormatArgs { + template_expr: expr, + template: Default::default(), + arguments: args.finish(), + }; + } + + let to_span = |inner_span: parse::InnerSpan| { + is_source_literal.then(|| { + TextRange::new(inner_span.start.try_into().unwrap(), inner_span.end.try_into().unwrap()) + }) + }; + + let mut used = vec![false; args.explicit_args().len()]; + let mut invalid_refs = Vec::new(); + let mut numeric_refences_to_named_arg = Vec::new(); + + enum ArgRef<'a> { + Index(usize), + Name(&'a str, Option), + } + let mut lookup_arg = |arg: ArgRef<'_>, + span: Option, + used_as: PositionUsedAs, + kind: FormatArgPositionKind| + -> FormatArgPosition { + let index = match arg { + ArgRef::Index(index) => { + if let Some(arg) = args.by_index(index) { + used[index] = true; + if arg.kind.ident().is_some() { + // This was a named argument, but it was used as a positional argument. + numeric_refences_to_named_arg.push((index, span, used_as)); + } + Ok(index) + } else { + // Doesn't exist as an explicit argument. + invalid_refs.push((index, span, used_as, kind)); + Err(index) + } + } + ArgRef::Name(name, _span) => { + let name = Name::new_text_dont_use(SmolStr::new(name)); + if let Some((index, _)) = args.by_name(&name) { + // Name found in `args`, so we resolve it to its index. + if index < args.explicit_args().len() { + // Mark it as used, if it was an explicit argument. + used[index] = true; + } + Ok(index) + } else { + // Name not found in `args`, so we add it as an implicitly captured argument. + if !is_direct_literal { + // For the moment capturing variables from format strings expanded from macros is + // disabled (see RFC #2795) + // FIXME: Diagnose + } + Ok(args.add(FormatArgument { + kind: FormatArgumentKind::Captured(name), + // FIXME: This is problematic, we might want to synthesize a dummy + // expression proper and/or desugar these. + expr: dummy_expr_id(), + })) + } + } + }; + FormatArgPosition { index, kind, span } + }; + + let mut template = Vec::new(); + let mut unfinished_literal = String::new(); + let mut placeholder_index = 0; + + for piece in pieces { + match piece { + parse::Piece::String(s) => { + unfinished_literal.push_str(s); + } + parse::Piece::NextArgument(arg) => { + let parse::Argument { position, position_span, format } = *arg; + if !unfinished_literal.is_empty() { + template.push(FormatArgsPiece::Literal( + mem::take(&mut unfinished_literal).into_boxed_str(), + )); + } + + let span = parser.arg_places.get(placeholder_index).and_then(|&s| to_span(s)); + placeholder_index += 1; + + let position_span = to_span(position_span); + let argument = match position { + parse::ArgumentImplicitlyIs(i) => lookup_arg( + ArgRef::Index(i), + position_span, + Placeholder(span), + FormatArgPositionKind::Implicit, + ), + parse::ArgumentIs(i) => lookup_arg( + ArgRef::Index(i), + position_span, + Placeholder(span), + FormatArgPositionKind::Number, + ), + parse::ArgumentNamed(name) => lookup_arg( + ArgRef::Name(name, position_span), + position_span, + Placeholder(span), + FormatArgPositionKind::Named, + ), + }; + + let alignment = match format.align { + parse::AlignUnknown => None, + parse::AlignLeft => Some(FormatAlignment::Left), + parse::AlignRight => Some(FormatAlignment::Right), + parse::AlignCenter => Some(FormatAlignment::Center), + }; + + let format_trait = match format.ty { + "" => FormatTrait::Display, + "?" => FormatTrait::Debug, + "e" => FormatTrait::LowerExp, + "E" => FormatTrait::UpperExp, + "o" => FormatTrait::Octal, + "p" => FormatTrait::Pointer, + "b" => FormatTrait::Binary, + "x" => FormatTrait::LowerHex, + "X" => FormatTrait::UpperHex, + _ => { + // FIXME: Diagnose + FormatTrait::Display + } + }; + + let precision_span = format.precision_span.and_then(to_span); + let precision = match format.precision { + parse::CountIs(n) => Some(FormatCount::Literal(n)), + parse::CountIsName(name, name_span) => Some(FormatCount::Argument(lookup_arg( + ArgRef::Name(name, to_span(name_span)), + precision_span, + Precision, + FormatArgPositionKind::Named, + ))), + parse::CountIsParam(i) => Some(FormatCount::Argument(lookup_arg( + ArgRef::Index(i), + precision_span, + Precision, + FormatArgPositionKind::Number, + ))), + parse::CountIsStar(i) => Some(FormatCount::Argument(lookup_arg( + ArgRef::Index(i), + precision_span, + Precision, + FormatArgPositionKind::Implicit, + ))), + parse::CountImplied => None, + }; + + let width_span = format.width_span.and_then(to_span); + let width = match format.width { + parse::CountIs(n) => Some(FormatCount::Literal(n)), + parse::CountIsName(name, name_span) => Some(FormatCount::Argument(lookup_arg( + ArgRef::Name(name, to_span(name_span)), + width_span, + Width, + FormatArgPositionKind::Named, + ))), + parse::CountIsParam(i) => Some(FormatCount::Argument(lookup_arg( + ArgRef::Index(i), + width_span, + Width, + FormatArgPositionKind::Number, + ))), + parse::CountIsStar(_) => unreachable!(), + parse::CountImplied => None, + }; + + template.push(FormatArgsPiece::Placeholder(FormatPlaceholder { + argument, + span, + format_trait, + format_options: FormatOptions { + fill: format.fill, + alignment, + sign: format.sign.map(|s| match s { + parse::Sign::Plus => FormatSign::Plus, + parse::Sign::Minus => FormatSign::Minus, + }), + alternate: format.alternate, + zero_pad: format.zero_pad, + debug_hex: format.debug_hex.map(|s| match s { + parse::DebugHex::Lower => FormatDebugHex::Lower, + parse::DebugHex::Upper => FormatDebugHex::Upper, + }), + precision, + width, + }, + })); + } + } + } + + if !unfinished_literal.is_empty() { + template.push(FormatArgsPiece::Literal(unfinished_literal.into_boxed_str())); + } + + if !invalid_refs.is_empty() { + // FIXME: Diagnose + } + + let unused = used + .iter() + .enumerate() + .filter(|&(_, used)| !used) + .map(|(i, _)| { + let named = matches!(args.explicit_args()[i].kind, FormatArgumentKind::Named(_)); + (args.explicit_args()[i].expr, named) + }) + .collect::>(); + + if !unused.is_empty() { + // FIXME: Diagnose + } + + FormatArgs { + template_expr: expr, + template: template.into_boxed_slice(), + arguments: args.finish(), + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FormatArgumentsCollector { + arguments: Vec, + num_unnamed_args: usize, + num_explicit_args: usize, + names: Vec<(Name, usize)>, +} + +impl FormatArgumentsCollector { + pub(crate) fn finish(self) -> FormatArguments { + FormatArguments { + arguments: self.arguments.into_boxed_slice(), + num_unnamed_args: self.num_unnamed_args, + num_explicit_args: self.num_explicit_args, + names: self.names.into_boxed_slice(), + } + } + + pub fn new() -> Self { + Self { arguments: vec![], names: vec![], num_unnamed_args: 0, num_explicit_args: 0 } + } + + pub fn add(&mut self, arg: FormatArgument) -> usize { + let index = self.arguments.len(); + if let Some(name) = arg.kind.ident() { + self.names.push((name.clone(), index)); + } else if self.names.is_empty() { + // Only count the unnamed args before the first named arg. + // (Any later ones are errors.) + self.num_unnamed_args += 1; + } + if !matches!(arg.kind, FormatArgumentKind::Captured(..)) { + // This is an explicit argument. + // Make sure that all arguments so far are explicit. + assert_eq!( + self.num_explicit_args, + self.arguments.len(), + "captured arguments must be added last" + ); + self.num_explicit_args += 1; + } + self.arguments.push(arg); + index + } + + pub fn by_name(&self, name: &Name) -> Option<(usize, &FormatArgument)> { + let &(_, i) = self.names.iter().find(|(n, _)| n == name)?; + Some((i, &self.arguments[i])) + } + + pub fn by_index(&self, i: usize) -> Option<&FormatArgument> { + (i < self.num_explicit_args).then(|| &self.arguments[i]) + } + + pub fn unnamed_args(&self) -> &[FormatArgument] { + &self.arguments[..self.num_unnamed_args] + } + + pub fn named_args(&self) -> &[FormatArgument] { + &self.arguments[self.num_unnamed_args..self.num_explicit_args] + } + + pub fn explicit_args(&self) -> &[FormatArgument] { + &self.arguments[..self.num_explicit_args] + } + + pub fn all_args(&self) -> &[FormatArgument] { + &self.arguments[..] + } + + pub fn all_args_mut(&mut self) -> &mut Vec { + &mut self.arguments + } +} + +impl FormatArgumentKind { + pub fn ident(&self) -> Option<&Name> { + match self { + Self::Normal => None, + Self::Named(id) => Some(id), + Self::Captured(id) => Some(id), + } + } +} diff --git a/crates/hir-def/src/hir/format_args/parse.rs b/crates/hir-def/src/hir/format_args/parse.rs new file mode 100644 index 0000000000..22efa3883d --- /dev/null +++ b/crates/hir-def/src/hir/format_args/parse.rs @@ -0,0 +1,1023 @@ +//! Macro support for format strings +//! +//! These structures are used when parsing format strings for the compiler. +//! Parsing does not happen at runtime: structures of `std::fmt::rt` are +//! generated instead. + +// This is a copy of +// https://github.com/Veykril/rust/blob/b89d7d6882532686fd90a89cec1a0fd386f0ade3/compiler/rustc_parse_format/src/lib.rs#L999-L1000 +// with the dependency of rustc-data-structures stripped out. + +// #![doc( +// html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/", +// html_playground_url = "https://play.rust-lang.org/", +// test(attr(deny(warnings))) +// )] +// #![deny(rustc::untranslatable_diagnostic)] +// #![deny(rustc::diagnostic_outside_of_impl)] +// We want to be able to build this crate with a stable compiler, so no +// `#![feature]` attributes should be added. +#![allow(dead_code, unreachable_pub)] + +use rustc_lexer::unescape; +pub use Alignment::*; +pub use Count::*; +pub use Piece::*; +pub use Position::*; + +use std::iter; +use std::str; +use std::string; + +// Note: copied from rustc_span +/// Range inside of a `Span` used for diagnostics when we only have access to relative positions. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct InnerSpan { + pub start: usize, + pub end: usize, +} + +impl InnerSpan { + pub fn new(start: usize, end: usize) -> InnerSpan { + InnerSpan { start, end } + } +} + +/// The location and before/after width of a character whose width has changed from its source code +/// representation +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct InnerWidthMapping { + /// Index of the character in the source + pub position: usize, + /// The inner width in characters + pub before: usize, + /// The transformed width in characters + pub after: usize, +} + +impl InnerWidthMapping { + pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping { + InnerWidthMapping { position, before, after } + } +} + +/// Whether the input string is a literal. If yes, it contains the inner width mappings. +#[derive(Clone, PartialEq, Eq)] +enum InputStringKind { + NotALiteral, + Literal { width_mappings: Vec }, +} + +/// The type of format string that we are parsing. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum ParseMode { + /// A normal format string as per `format_args!`. + Format, + /// An inline assembly template string for `asm!`. + InlineAsm, +} + +#[derive(Copy, Clone)] +struct InnerOffset(usize); + +impl InnerOffset { + fn to(self, end: InnerOffset) -> InnerSpan { + InnerSpan::new(self.0, end.0) + } +} + +/// A piece is a portion of the format string which represents the next part +/// to emit. These are emitted as a stream by the `Parser` class. +#[derive(Clone, Debug, PartialEq)] +pub enum Piece<'a> { + /// A literal string which should directly be emitted + String(&'a str), + /// This describes that formatting should process the next argument (as + /// specified inside) for emission. + NextArgument(Box>), +} + +/// Representation of an argument specification. +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct Argument<'a> { + /// Where to find this argument + pub position: Position<'a>, + /// The span of the position indicator. Includes any whitespace in implicit + /// positions (`{ }`). + pub position_span: InnerSpan, + /// How to format the argument + pub format: FormatSpec<'a>, +} + +/// Specification for the formatting of an argument in the format string. +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct FormatSpec<'a> { + /// Optionally specified character to fill alignment with. + pub fill: Option, + /// Span of the optionally specified fill character. + pub fill_span: Option, + /// Optionally specified alignment. + pub align: Alignment, + /// The `+` or `-` flag. + pub sign: Option, + /// The `#` flag. + pub alternate: bool, + /// The `0` flag. + pub zero_pad: bool, + /// The `x` or `X` flag. (Only for `Debug`.) + pub debug_hex: Option, + /// The integer precision to use. + pub precision: Count<'a>, + /// The span of the precision formatting flag (for diagnostics). + pub precision_span: Option, + /// The string width requested for the resulting format. + pub width: Count<'a>, + /// The span of the width formatting flag (for diagnostics). + pub width_span: Option, + /// The descriptor string representing the name of the format desired for + /// this argument, this can be empty or any number of characters, although + /// it is required to be one word. + pub ty: &'a str, + /// The span of the descriptor string (for diagnostics). + pub ty_span: Option, +} + +/// Enum describing where an argument for a format can be located. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum Position<'a> { + /// The argument is implied to be located at an index + ArgumentImplicitlyIs(usize), + /// The argument is located at a specific index given in the format, + ArgumentIs(usize), + /// The argument has a name. + ArgumentNamed(&'a str), +} + +impl Position<'_> { + pub fn index(&self) -> Option { + match self { + ArgumentIs(i, ..) | ArgumentImplicitlyIs(i) => Some(*i), + _ => None, + } + } +} + +/// Enum of alignments which are supported. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum Alignment { + /// The value will be aligned to the left. + AlignLeft, + /// The value will be aligned to the right. + AlignRight, + /// The value will be aligned in the center. + AlignCenter, + /// The value will take on a default alignment. + AlignUnknown, +} + +/// Enum for the sign flags. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum Sign { + /// The `+` flag. + Plus, + /// The `-` flag. + Minus, +} + +/// Enum for the debug hex flags. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum DebugHex { + /// The `x` flag in `{:x?}`. + Lower, + /// The `X` flag in `{:X?}`. + Upper, +} + +/// A count is used for the precision and width parameters of an integer, and +/// can reference either an argument or a literal integer. +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum Count<'a> { + /// The count is specified explicitly. + CountIs(usize), + /// The count is specified by the argument with the given name. + CountIsName(&'a str, InnerSpan), + /// The count is specified by the argument at the given index. + CountIsParam(usize), + /// The count is specified by a star (like in `{:.*}`) that refers to the argument at the given index. + CountIsStar(usize), + /// The count is implied and cannot be explicitly specified. + CountImplied, +} + +pub struct ParseError { + pub description: string::String, + pub note: Option, + pub label: string::String, + pub span: InnerSpan, + pub secondary_label: Option<(string::String, InnerSpan)>, + pub should_be_replaced_with_positional_argument: bool, +} + +/// The parser structure for interpreting the input format string. This is +/// modeled as an iterator over `Piece` structures to form a stream of tokens +/// being output. +/// +/// This is a recursive-descent parser for the sake of simplicity, and if +/// necessary there's probably lots of room for improvement performance-wise. +pub struct Parser<'a> { + mode: ParseMode, + input: &'a str, + cur: iter::Peekable>, + /// Error messages accumulated during parsing + pub errors: Vec, + /// Current position of implicit positional argument pointer + pub curarg: usize, + /// `Some(raw count)` when the string is "raw", used to position spans correctly + style: Option, + /// Start and end byte offset of every successfully parsed argument + pub arg_places: Vec, + /// Characters whose length has been changed from their in-code representation + width_map: Vec, + /// Span of the last opening brace seen, used for error reporting + last_opening_brace: Option, + /// Whether the source string is comes from `println!` as opposed to `format!` or `print!` + append_newline: bool, + /// Whether this formatting string was written directly in the source. This controls whether we + /// can use spans to refer into it and give better error messages. + /// N.B: This does _not_ control whether implicit argument captures can be used. + pub is_source_literal: bool, + /// Start position of the current line. + cur_line_start: usize, + /// Start and end byte offset of every line of the format string. Excludes + /// newline characters and leading whitespace. + pub line_spans: Vec, +} + +impl<'a> Iterator for Parser<'a> { + type Item = Piece<'a>; + + fn next(&mut self) -> Option> { + if let Some(&(pos, c)) = self.cur.peek() { + match c { + '{' => { + let curr_last_brace = self.last_opening_brace; + let byte_pos = self.to_span_index(pos); + let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos)); + self.last_opening_brace = Some(byte_pos.to(lbrace_end)); + self.cur.next(); + if self.consume('{') { + self.last_opening_brace = curr_last_brace; + + Some(String(self.string(pos + 1))) + } else { + let arg = self.argument(lbrace_end); + if let Some(rbrace_pos) = self.consume_closing_brace(&arg) { + if self.is_source_literal { + let lbrace_byte_pos = self.to_span_index(pos); + let rbrace_byte_pos = self.to_span_index(rbrace_pos); + + let width = self.to_span_width(rbrace_pos); + + self.arg_places.push( + lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)), + ); + } + } else { + if let Some(&(_, maybe)) = self.cur.peek() { + if maybe == '?' { + self.suggest_format(); + } else { + self.suggest_positional_arg_instead_of_captured_arg(arg); + } + } + } + Some(NextArgument(Box::new(arg))) + } + } + '}' => { + self.cur.next(); + if self.consume('}') { + Some(String(self.string(pos + 1))) + } else { + let err_pos = self.to_span_index(pos); + self.err_with_note( + "unmatched `}` found", + "unmatched `}`", + "if you intended to print `}`, you can escape it using `}}`", + err_pos.to(err_pos), + ); + None + } + } + _ => Some(String(self.string(pos))), + } + } else { + if self.is_source_literal { + let span = self.span(self.cur_line_start, self.input.len()); + if self.line_spans.last() != Some(&span) { + self.line_spans.push(span); + } + } + None + } + } +} + +impl<'a> Parser<'a> { + /// Creates a new parser for the given format string + pub fn new( + s: &'a str, + style: Option, + snippet: Option, + append_newline: bool, + mode: ParseMode, + ) -> Parser<'a> { + let input_string_kind = find_width_map_from_snippet(s, snippet, style); + let (width_map, is_source_literal) = match input_string_kind { + InputStringKind::Literal { width_mappings } => (width_mappings, true), + InputStringKind::NotALiteral => (Vec::new(), false), + }; + + Parser { + mode, + input: s, + cur: s.char_indices().peekable(), + errors: vec![], + curarg: 0, + style, + arg_places: vec![], + width_map, + last_opening_brace: None, + append_newline, + is_source_literal, + cur_line_start: 0, + line_spans: vec![], + } + } + + /// Notifies of an error. The message doesn't actually need to be of type + /// String, but I think it does when this eventually uses conditions so it + /// might as well start using it now. + fn err, S2: Into>( + &mut self, + description: S1, + label: S2, + span: InnerSpan, + ) { + self.errors.push(ParseError { + description: description.into(), + note: None, + label: label.into(), + span, + secondary_label: None, + should_be_replaced_with_positional_argument: false, + }); + } + + /// Notifies of an error. The message doesn't actually need to be of type + /// String, but I think it does when this eventually uses conditions so it + /// might as well start using it now. + fn err_with_note< + S1: Into, + S2: Into, + S3: Into, + >( + &mut self, + description: S1, + label: S2, + note: S3, + span: InnerSpan, + ) { + self.errors.push(ParseError { + description: description.into(), + note: Some(note.into()), + label: label.into(), + span, + secondary_label: None, + should_be_replaced_with_positional_argument: false, + }); + } + + /// Optionally consumes the specified character. If the character is not at + /// the current position, then the current iterator isn't moved and `false` is + /// returned, otherwise the character is consumed and `true` is returned. + fn consume(&mut self, c: char) -> bool { + self.consume_pos(c).is_some() + } + + /// Optionally consumes the specified character. If the character is not at + /// the current position, then the current iterator isn't moved and `None` is + /// returned, otherwise the character is consumed and the current position is + /// returned. + fn consume_pos(&mut self, c: char) -> Option { + if let Some(&(pos, maybe)) = self.cur.peek() { + if c == maybe { + self.cur.next(); + return Some(pos); + } + } + None + } + + fn remap_pos(&self, mut pos: usize) -> InnerOffset { + for width in &self.width_map { + if pos > width.position { + pos += width.before - width.after; + } else if pos == width.position && width.after == 0 { + pos += width.before; + } else { + break; + } + } + + InnerOffset(pos) + } + + fn to_span_index(&self, pos: usize) -> InnerOffset { + // This handles the raw string case, the raw argument is the number of # + // in r###"..."### (we need to add one because of the `r`). + let raw = self.style.map_or(0, |raw| raw + 1); + let pos = self.remap_pos(pos); + InnerOffset(raw + pos.0 + 1) + } + + fn to_span_width(&self, pos: usize) -> usize { + let pos = self.remap_pos(pos); + match self.width_map.iter().find(|w| w.position == pos.0) { + Some(w) => w.before, + None => 1, + } + } + + fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan { + let start = self.to_span_index(start_pos); + let end = self.to_span_index(end_pos); + start.to(end) + } + + /// Forces consumption of the specified character. If the character is not + /// found, an error is emitted. + fn consume_closing_brace(&mut self, arg: &Argument<'_>) -> Option { + self.ws(); + + let pos; + let description; + + if let Some(&(peek_pos, maybe)) = self.cur.peek() { + if maybe == '}' { + self.cur.next(); + return Some(peek_pos); + } + + pos = peek_pos; + description = format!("expected `'}}'`, found `{maybe:?}`"); + } else { + description = "expected `'}'` but string was terminated".to_owned(); + // point at closing `"` + pos = self.input.len() - if self.append_newline { 1 } else { 0 }; + } + + let pos = self.to_span_index(pos); + + let label = "expected `'}'`".to_owned(); + let (note, secondary_label) = if arg.format.fill == Some('}') { + ( + Some("the character `'}'` is interpreted as a fill character because of the `:` that precedes it".to_owned()), + arg.format.fill_span.map(|sp| ("this is not interpreted as a formatting closing brace".to_owned(), sp)), + ) + } else { + ( + Some("if you intended to print `{`, you can escape it using `{{`".to_owned()), + self.last_opening_brace.map(|sp| ("because of this opening brace".to_owned(), sp)), + ) + }; + + self.errors.push(ParseError { + description, + note, + label, + span: pos.to(pos), + secondary_label, + should_be_replaced_with_positional_argument: false, + }); + + None + } + + /// Consumes all whitespace characters until the first non-whitespace character + fn ws(&mut self) { + while let Some(&(_, c)) = self.cur.peek() { + if c.is_whitespace() { + self.cur.next(); + } else { + break; + } + } + } + + /// Parses all of a string which is to be considered a "raw literal" in a + /// format string. This is everything outside of the braces. + fn string(&mut self, start: usize) -> &'a str { + // we may not consume the character, peek the iterator + while let Some(&(pos, c)) = self.cur.peek() { + match c { + '{' | '}' => { + return &self.input[start..pos]; + } + '\n' if self.is_source_literal => { + self.line_spans.push(self.span(self.cur_line_start, pos)); + self.cur_line_start = pos + 1; + self.cur.next(); + } + _ => { + if self.is_source_literal && pos == self.cur_line_start && c.is_whitespace() { + self.cur_line_start = pos + c.len_utf8(); + } + self.cur.next(); + } + } + } + &self.input[start..self.input.len()] + } + + /// Parses an `Argument` structure, or what's contained within braces inside the format string. + fn argument(&mut self, start: InnerOffset) -> Argument<'a> { + let pos = self.position(); + + let end = self + .cur + .clone() + .find(|(_, ch)| !ch.is_whitespace()) + .map_or(start, |(end, _)| self.to_span_index(end)); + let position_span = start.to(end); + + let format = match self.mode { + ParseMode::Format => self.format(), + ParseMode::InlineAsm => self.inline_asm(), + }; + + // Resolve position after parsing format spec. + let pos = match pos { + Some(position) => position, + None => { + let i = self.curarg; + self.curarg += 1; + ArgumentImplicitlyIs(i) + } + }; + + Argument { position: pos, position_span, format } + } + + /// Parses a positional argument for a format. This could either be an + /// integer index of an argument, a named argument, or a blank string. + /// Returns `Some(parsed_position)` if the position is not implicitly + /// consuming a macro argument, `None` if it's the case. + fn position(&mut self) -> Option> { + if let Some(i) = self.integer() { + Some(ArgumentIs(i)) + } else { + match self.cur.peek() { + Some(&(_, c)) if rustc_lexer::is_id_start(c) => Some(ArgumentNamed(self.word())), + + // This is an `ArgumentNext`. + // Record the fact and do the resolution after parsing the + // format spec, to make things like `{:.*}` work. + _ => None, + } + } + } + + fn current_pos(&mut self) -> usize { + if let Some(&(pos, _)) = self.cur.peek() { + pos + } else { + self.input.len() + } + } + + /// Parses a format specifier at the current position, returning all of the + /// relevant information in the `FormatSpec` struct. + fn format(&mut self) -> FormatSpec<'a> { + let mut spec = FormatSpec { + fill: None, + fill_span: None, + align: AlignUnknown, + sign: None, + alternate: false, + zero_pad: false, + debug_hex: None, + precision: CountImplied, + precision_span: None, + width: CountImplied, + width_span: None, + ty: &self.input[..0], + ty_span: None, + }; + if !self.consume(':') { + return spec; + } + + // fill character + if let Some(&(idx, c)) = self.cur.peek() { + if let Some((_, '>' | '<' | '^')) = self.cur.clone().nth(1) { + spec.fill = Some(c); + spec.fill_span = Some(self.span(idx, idx + 1)); + self.cur.next(); + } + } + // Alignment + if self.consume('<') { + spec.align = AlignLeft; + } else if self.consume('>') { + spec.align = AlignRight; + } else if self.consume('^') { + spec.align = AlignCenter; + } + // Sign flags + if self.consume('+') { + spec.sign = Some(Sign::Plus); + } else if self.consume('-') { + spec.sign = Some(Sign::Minus); + } + // Alternate marker + if self.consume('#') { + spec.alternate = true; + } + // Width and precision + let mut havewidth = false; + + if self.consume('0') { + // small ambiguity with '0$' as a format string. In theory this is a + // '0' flag and then an ill-formatted format string with just a '$' + // and no count, but this is better if we instead interpret this as + // no '0' flag and '0$' as the width instead. + if let Some(end) = self.consume_pos('$') { + spec.width = CountIsParam(0); + spec.width_span = Some(self.span(end - 1, end + 1)); + havewidth = true; + } else { + spec.zero_pad = true; + } + } + + if !havewidth { + let start = self.current_pos(); + spec.width = self.count(start); + if spec.width != CountImplied { + let end = self.current_pos(); + spec.width_span = Some(self.span(start, end)); + } + } + + if let Some(start) = self.consume_pos('.') { + if self.consume('*') { + // Resolve `CountIsNextParam`. + // We can do this immediately as `position` is resolved later. + let i = self.curarg; + self.curarg += 1; + spec.precision = CountIsStar(i); + } else { + spec.precision = self.count(start + 1); + } + let end = self.current_pos(); + spec.precision_span = Some(self.span(start, end)); + } + + let ty_span_start = self.current_pos(); + // Optional radix followed by the actual format specifier + if self.consume('x') { + if self.consume('?') { + spec.debug_hex = Some(DebugHex::Lower); + spec.ty = "?"; + } else { + spec.ty = "x"; + } + } else if self.consume('X') { + if self.consume('?') { + spec.debug_hex = Some(DebugHex::Upper); + spec.ty = "?"; + } else { + spec.ty = "X"; + } + } else if self.consume('?') { + spec.ty = "?"; + } else { + spec.ty = self.word(); + if !spec.ty.is_empty() { + let ty_span_end = self.current_pos(); + spec.ty_span = Some(self.span(ty_span_start, ty_span_end)); + } + } + spec + } + + /// Parses an inline assembly template modifier at the current position, returning the modifier + /// in the `ty` field of the `FormatSpec` struct. + fn inline_asm(&mut self) -> FormatSpec<'a> { + let mut spec = FormatSpec { + fill: None, + fill_span: None, + align: AlignUnknown, + sign: None, + alternate: false, + zero_pad: false, + debug_hex: None, + precision: CountImplied, + precision_span: None, + width: CountImplied, + width_span: None, + ty: &self.input[..0], + ty_span: None, + }; + if !self.consume(':') { + return spec; + } + + let ty_span_start = self.current_pos(); + spec.ty = self.word(); + if !spec.ty.is_empty() { + let ty_span_end = self.current_pos(); + spec.ty_span = Some(self.span(ty_span_start, ty_span_end)); + } + + spec + } + + /// Parses a `Count` parameter at the current position. This does not check + /// for 'CountIsNextParam' because that is only used in precision, not + /// width. + fn count(&mut self, start: usize) -> Count<'a> { + if let Some(i) = self.integer() { + if self.consume('$') { + CountIsParam(i) + } else { + CountIs(i) + } + } else { + let tmp = self.cur.clone(); + let word = self.word(); + if word.is_empty() { + self.cur = tmp; + CountImplied + } else if let Some(end) = self.consume_pos('$') { + let name_span = self.span(start, end); + CountIsName(word, name_span) + } else { + self.cur = tmp; + CountImplied + } + } + } + + /// Parses a word starting at the current position. A word is the same as + /// Rust identifier, except that it can't start with `_` character. + fn word(&mut self) -> &'a str { + let start = match self.cur.peek() { + Some(&(pos, c)) if rustc_lexer::is_id_start(c) => { + self.cur.next(); + pos + } + _ => { + return ""; + } + }; + let mut end = None; + while let Some(&(pos, c)) = self.cur.peek() { + if rustc_lexer::is_id_continue(c) { + self.cur.next(); + } else { + end = Some(pos); + break; + } + } + let end = end.unwrap_or(self.input.len()); + let word = &self.input[start..end]; + if word == "_" { + self.err_with_note( + "invalid argument name `_`", + "invalid argument name", + "argument name cannot be a single underscore", + self.span(start, end), + ); + } + word + } + + fn integer(&mut self) -> Option { + let mut cur: usize = 0; + let mut found = false; + let mut overflow = false; + let start = self.current_pos(); + while let Some(&(_, c)) = self.cur.peek() { + if let Some(i) = c.to_digit(10) { + let (tmp, mul_overflow) = cur.overflowing_mul(10); + let (tmp, add_overflow) = tmp.overflowing_add(i as usize); + if mul_overflow || add_overflow { + overflow = true; + } + cur = tmp; + found = true; + self.cur.next(); + } else { + break; + } + } + + if overflow { + let end = self.current_pos(); + let overflowed_int = &self.input[start..end]; + self.err( + format!( + "integer `{}` does not fit into the type `usize` whose range is `0..={}`", + overflowed_int, + usize::MAX + ), + "integer out of range for `usize`", + self.span(start, end), + ); + } + + found.then_some(cur) + } + + fn suggest_format(&mut self) { + if let (Some(pos), Some(_)) = (self.consume_pos('?'), self.consume_pos(':')) { + let word = self.word(); + let _end = self.current_pos(); + let pos = self.to_span_index(pos); + self.errors.insert( + 0, + ParseError { + description: "expected format parameter to occur after `:`".to_owned(), + note: Some(format!("`?` comes after `:`, try `{}:{}` instead", word, "?")), + label: "expected `?` to occur after `:`".to_owned(), + span: pos.to(pos), + secondary_label: None, + should_be_replaced_with_positional_argument: false, + }, + ); + } + } + + fn suggest_positional_arg_instead_of_captured_arg(&mut self, arg: Argument<'a>) { + if let Some(end) = self.consume_pos('.') { + let byte_pos = self.to_span_index(end); + let start = InnerOffset(byte_pos.0 + 1); + let field = self.argument(start); + // We can only parse `foo.bar` field access, any deeper nesting, + // or another type of expression, like method calls, are not supported + if !self.consume('}') { + return; + } + if let ArgumentNamed(_) = arg.position { + if let ArgumentNamed(_) = field.position { + self.errors.insert( + 0, + ParseError { + description: "field access isn't supported".to_string(), + note: None, + label: "not supported".to_string(), + span: InnerSpan::new(arg.position_span.start, field.position_span.end), + secondary_label: None, + should_be_replaced_with_positional_argument: true, + }, + ); + } + } + } + } +} + +/// Finds the indices of all characters that have been processed and differ between the actual +/// written code (code snippet) and the `InternedString` that gets processed in the `Parser` +/// in order to properly synthesise the intra-string `Span`s for error diagnostics. +fn find_width_map_from_snippet( + input: &str, + snippet: Option, + str_style: Option, +) -> InputStringKind { + let snippet = match snippet { + Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s, + _ => return InputStringKind::NotALiteral, + }; + + if str_style.is_some() { + return InputStringKind::Literal { width_mappings: Vec::new() }; + } + + // Strip quotes. + let snippet = &snippet[1..snippet.len() - 1]; + + // Macros like `println` add a newline at the end. That technically doesn't make them "literals" anymore, but it's fine + // since we will never need to point our spans there, so we lie about it here by ignoring it. + // Since there might actually be newlines in the source code, we need to normalize away all trailing newlines. + // If we only trimmed it off the input, `format!("\n")` would cause a mismatch as here we they actually match up. + // Alternatively, we could just count the trailing newlines and only trim one from the input if they don't match up. + let input_no_nl = input.trim_end_matches('\n'); + let Some(unescaped) = unescape_string(snippet) else { + return InputStringKind::NotALiteral; + }; + + let unescaped_no_nl = unescaped.trim_end_matches('\n'); + + if unescaped_no_nl != input_no_nl { + // The source string that we're pointing at isn't our input, so spans pointing at it will be incorrect. + // This can for example happen with proc macros that respan generated literals. + return InputStringKind::NotALiteral; + } + + let mut s = snippet.char_indices(); + let mut width_mappings = vec![]; + while let Some((pos, c)) = s.next() { + match (c, s.clone().next()) { + // skip whitespace and empty lines ending in '\\' + ('\\', Some((_, '\n'))) => { + let _ = s.next(); + let mut width = 2; + + while let Some((_, c)) = s.clone().next() { + if matches!(c, ' ' | '\n' | '\t') { + width += 1; + let _ = s.next(); + } else { + break; + } + } + + width_mappings.push(InnerWidthMapping::new(pos, width, 0)); + } + ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { + width_mappings.push(InnerWidthMapping::new(pos, 2, 1)); + let _ = s.next(); + } + ('\\', Some((_, 'x'))) => { + // consume `\xAB` literal + s.nth(2); + width_mappings.push(InnerWidthMapping::new(pos, 4, 1)); + } + ('\\', Some((_, 'u'))) => { + let mut width = 2; + let _ = s.next(); + + if let Some((_, next_c)) = s.next() { + if next_c == '{' { + // consume up to 6 hexanumeric chars + let digits_len = + s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count(); + + let len_utf8 = s + .as_str() + .get(..digits_len) + .and_then(|digits| u32::from_str_radix(digits, 16).ok()) + .and_then(char::from_u32) + .map_or(1, char::len_utf8); + + // Skip the digits, for chars that encode to more than 1 utf-8 byte + // exclude as many digits as it is greater than 1 byte + // + // So for a 3 byte character, exclude 2 digits + let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1)); + + // skip '{' and '}' also + width += required_skips + 2; + + s.nth(digits_len); + } else if next_c.is_digit(16) { + width += 1; + + // We suggest adding `{` and `}` when appropriate, accept it here as if + // it were correct + let mut i = 0; // consume up to 6 hexanumeric chars + while let (Some((_, c)), _) = (s.next(), i < 6) { + if c.is_digit(16) { + width += 1; + } else { + break; + } + i += 1; + } + } + } + + width_mappings.push(InnerWidthMapping::new(pos, width, 1)); + } + _ => {} + } + } + + InputStringKind::Literal { width_mappings } +} + +fn unescape_string(string: &str) -> Option { + let mut buf = string::String::new(); + let mut ok = true; + unescape::unescape_literal(string, unescape::Mode::Str, &mut |_, unescaped_char| { + match unescaped_char { + Ok(c) => buf.push(c), + Err(_) => ok = false, + } + }); + + ok.then_some(buf) +} diff --git a/crates/hir-expand/src/name.rs b/crates/hir-expand/src/name.rs index 7c179c0cf9..5395b867c9 100644 --- a/crates/hir-expand/src/name.rs +++ b/crates/hir-expand/src/name.rs @@ -54,6 +54,12 @@ impl Name { Name(Repr::Text(text)) } + // FIXME: See above, unfortunately some places really need this right now + #[doc(hidden)] + pub const fn new_text_dont_use(text: SmolStr) -> Name { + Name(Repr::Text(text)) + } + pub fn new_tuple_field(idx: usize) -> Name { Name(Repr::TupleField(idx)) } diff --git a/crates/hir-ty/src/infer/closure.rs b/crates/hir-ty/src/infer/closure.rs index 1f040393f1..9431599ac7 100644 --- a/crates/hir-ty/src/infer/closure.rs +++ b/crates/hir-ty/src/infer/closure.rs @@ -9,7 +9,10 @@ use chalk_ir::{ }; use hir_def::{ data::adt::VariantData, - hir::{Array, BinaryOp, BindingId, CaptureBy, Expr, ExprId, Pat, PatId, Statement, UnaryOp}, + hir::{ + format_args::FormatArgumentKind, Array, BinaryOp, BindingId, CaptureBy, Expr, ExprId, Pat, + PatId, Statement, UnaryOp, + }, lang_item::LangItem, resolver::{resolver_for_expr, ResolveValueResult, ValueNs}, DefWithBodyId, FieldId, HasModule, VariantId, @@ -453,6 +456,14 @@ impl InferenceContext<'_> { fn walk_expr_without_adjust(&mut self, tgt_expr: ExprId) { match &self.body[tgt_expr] { Expr::OffsetOf(_) => (), + Expr::FormatArgs(fa) => { + self.walk_expr_without_adjust(fa.template_expr); + fa.arguments + .arguments + .iter() + .filter(|it| !matches!(it.kind, FormatArgumentKind::Captured(_))) + .for_each(|it| self.walk_expr_without_adjust(it.expr)); + } Expr::InlineAsm(e) => self.walk_expr_without_adjust(e.e), Expr::If { condition, then_branch, else_branch } => { self.consume_expr(*condition); diff --git a/crates/hir-ty/src/infer/expr.rs b/crates/hir-ty/src/infer/expr.rs index 555a9fae48..06742f7b36 100644 --- a/crates/hir-ty/src/infer/expr.rs +++ b/crates/hir-ty/src/infer/expr.rs @@ -9,7 +9,8 @@ use chalk_ir::{cast::Cast, fold::Shift, DebruijnIndex, Mutability, TyVariableKin use hir_def::{ generics::TypeOrConstParamData, hir::{ - ArithOp, Array, BinaryOp, ClosureKind, Expr, ExprId, LabelId, Literal, Statement, UnaryOp, + format_args::FormatArgumentKind, ArithOp, Array, BinaryOp, ClosureKind, Expr, ExprId, + LabelId, Literal, Statement, UnaryOp, }, lang_item::{LangItem, LangItemTarget}, path::{GenericArg, GenericArgs}, @@ -848,6 +849,25 @@ impl InferenceContext<'_> { self.infer_expr_no_expect(it.e); self.result.standard_types.unit.clone() } + Expr::FormatArgs(fa) => { + fa.arguments + .arguments + .iter() + .filter(|it| !matches!(it.kind, FormatArgumentKind::Captured(_))) + .for_each(|it| _ = self.infer_expr_no_expect(it.expr)); + + match self + .resolve_lang_item(LangItem::FormatArguments) + .and_then(|it| it.as_struct()) + { + Some(s) => { + // NOTE: This struct has a lifetime parameter, but we don't currently emit + // those to chalk + TyKind::Adt(AdtId(s.into()), Substitution::empty(Interner)).intern(Interner) + } + None => self.err_ty(), + } + } }; // use a new type variable if we got unknown here let ty = self.insert_type_vars_shallow(ty); diff --git a/crates/hir-ty/src/infer/mutability.rs b/crates/hir-ty/src/infer/mutability.rs index b8a1af96fb..763f4ed2f9 100644 --- a/crates/hir-ty/src/infer/mutability.rs +++ b/crates/hir-ty/src/infer/mutability.rs @@ -3,7 +3,10 @@ use chalk_ir::Mutability; use hir_def::{ - hir::{Array, BinaryOp, BindingAnnotation, Expr, ExprId, PatId, Statement, UnaryOp}, + hir::{ + format_args::FormatArgumentKind, Array, BinaryOp, BindingAnnotation, Expr, ExprId, PatId, + Statement, UnaryOp, + }, lang_item::LangItem, }; use hir_expand::name; @@ -37,6 +40,13 @@ impl InferenceContext<'_> { Expr::Missing => (), Expr::InlineAsm(e) => self.infer_mut_expr_without_adjust(e.e, Mutability::Not), Expr::OffsetOf(_) => (), + Expr::FormatArgs(fa) => { + fa.arguments + .arguments + .iter() + .filter(|it| !matches!(it.kind, FormatArgumentKind::Captured(_))) + .for_each(|arg| self.infer_mut_expr_without_adjust(arg.expr, Mutability::Not)); + } &Expr::If { condition, then_branch, else_branch } => { self.infer_mut_expr(condition, Mutability::Not); self.infer_mut_expr(then_branch, Mutability::Not); diff --git a/crates/hir-ty/src/mir/lower.rs b/crates/hir-ty/src/mir/lower.rs index b6408cea50..9cc98684bf 100644 --- a/crates/hir-ty/src/mir/lower.rs +++ b/crates/hir-ty/src/mir/lower.rs @@ -376,6 +376,9 @@ impl<'ctx> MirLowerCtx<'ctx> { Expr::InlineAsm(_) => { not_supported!("builtin#asm") } + Expr::FormatArgs(_) => { + not_supported!("builtin#format_args") + } Expr::Missing => { if let DefWithBodyId::FunctionId(f) = self.owner { let assoc = f.lookup(self.db.upcast()); diff --git a/crates/hir-ty/src/tests/simple.rs b/crates/hir-ty/src/tests/simple.rs index e7cb7cd417..0c73370a68 100644 --- a/crates/hir-ty/src/tests/simple.rs +++ b/crates/hir-ty/src/tests/simple.rs @@ -3612,3 +3612,25 @@ fn main() { "#, ); } + +#[test] +fn builtin_format_args() { + check_infer( + r#" +#[lang = "format_arguments"] +pub struct Arguments<'a>; +fn main() { + let are = "are"; + builtin#format_args("hello {} friends, we {are} {0}{last}", "fancy", last = "!"); +} +"#, + expect![[r#" + 65..175 '{ ...!"); }': () + 75..78 'are': &str + 81..86 '"are"': &str + 92..172 'builti...= "!")': Arguments<'_> + 152..159 '"fancy"': &str + 168..171 '"!"': &str + "#]], + ); +} diff --git a/crates/parser/src/grammar/expressions/atom.rs b/crates/parser/src/grammar/expressions/atom.rs index e13284d1b7..4197f248e0 100644 --- a/crates/parser/src/grammar/expressions/atom.rs +++ b/crates/parser/src/grammar/expressions/atom.rs @@ -219,7 +219,7 @@ fn tuple_expr(p: &mut Parser<'_>) -> CompletedMarker { // test builtin_expr // fn foo() { // builtin#asm(0); -// builtin#format_args(0); +// builtin#format_args("", 0, 1, a = 2 + 3, a + b); // builtin#offset_of(Foo, bar.baz.0); // } fn builtin_expr(p: &mut Parser<'_>) -> Option { @@ -249,6 +249,24 @@ fn builtin_expr(p: &mut Parser<'_>) -> Option { p.bump_remap(T![format_args]); p.expect(T!['(']); expr(p); + if p.eat(T![,]) { + while !p.at(EOF) && !p.at(T![')']) { + let m = p.start(); + if p.at(IDENT) && p.nth_at(1, T![=]) { + name(p); + p.bump(T![=]); + } + if expr(p).is_none() { + m.abandon(p); + break; + } + m.complete(p, FORMAT_ARGS_ARG); + + if !p.at(T![')']) { + p.expect(T![,]); + } + } + } p.expect(T![')']); Some(m.complete(p, FORMAT_ARGS_EXPR)) } else if p.at_contextual_kw(T![asm]) { diff --git a/crates/parser/src/syntax_kind/generated.rs b/crates/parser/src/syntax_kind/generated.rs index 3e31e4628b..db5278f89d 100644 --- a/crates/parser/src/syntax_kind/generated.rs +++ b/crates/parser/src/syntax_kind/generated.rs @@ -210,6 +210,7 @@ pub enum SyntaxKind { OFFSET_OF_EXPR, ASM_EXPR, FORMAT_ARGS_EXPR, + FORMAT_ARGS_ARG, CALL_EXPR, INDEX_EXPR, METHOD_CALL_EXPR, diff --git a/crates/parser/test_data/parser/inline/ok/0207_builtin_expr.rs b/crates/parser/test_data/parser/inline/ok/0207_builtin_expr.rs index dbad0a91df..14431b0210 100644 --- a/crates/parser/test_data/parser/inline/ok/0207_builtin_expr.rs +++ b/crates/parser/test_data/parser/inline/ok/0207_builtin_expr.rs @@ -1,5 +1,5 @@ fn foo() { builtin#asm(0); - builtin#format_args(0); + builtin#format_args("", 0, 1, a = 2 + 3, a + b); builtin#offset_of(Foo, bar.baz.0); } diff --git a/crates/syntax/rust.ungram b/crates/syntax/rust.ungram index 2ce609b97a..3603560d35 100644 --- a/crates/syntax/rust.ungram +++ b/crates/syntax/rust.ungram @@ -382,7 +382,13 @@ AsmExpr = Attr* 'builtin' '#' 'asm' '(' Expr ')' FormatArgsExpr = - Attr* 'builtin' '#' 'format_args' '(' ')' + Attr* 'builtin' '#' 'format_args' '(' + template:Expr + (',' args:(FormatArgsArg (',' FormatArgsArg)* ','?)? )? + ')' + +FormatArgsArg = + (Name '=')? Expr MacroExpr = MacroCall diff --git a/crates/syntax/src/ast/generated/nodes.rs b/crates/syntax/src/ast/generated/nodes.rs index 1c5e2282ec..7ba0d4dc65 100644 --- a/crates/syntax/src/ast/generated/nodes.rs +++ b/crates/syntax/src/ast/generated/nodes.rs @@ -931,6 +931,9 @@ impl FormatArgsExpr { support::token(&self.syntax, T![format_args]) } pub fn l_paren_token(&self) -> Option { support::token(&self.syntax, T!['(']) } + pub fn template(&self) -> Option { support::child(&self.syntax) } + pub fn comma_token(&self) -> Option { support::token(&self.syntax, T![,]) } + pub fn args(&self) -> AstChildren { support::children(&self.syntax) } pub fn r_paren_token(&self) -> Option { support::token(&self.syntax, T![')']) } } @@ -1163,6 +1166,16 @@ impl UnderscoreExpr { pub fn underscore_token(&self) -> Option { support::token(&self.syntax, T![_]) } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FormatArgsArg { + pub(crate) syntax: SyntaxNode, +} +impl ast::HasName for FormatArgsArg {} +impl FormatArgsArg { + pub fn eq_token(&self) -> Option { support::token(&self.syntax, T![=]) } + pub fn expr(&self) -> Option { support::child(&self.syntax) } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct StmtList { pub(crate) syntax: SyntaxNode, @@ -2855,6 +2868,17 @@ impl AstNode for UnderscoreExpr { } fn syntax(&self) -> &SyntaxNode { &self.syntax } } +impl AstNode for FormatArgsArg { + fn can_cast(kind: SyntaxKind) -> bool { kind == FORMAT_ARGS_ARG } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { &self.syntax } +} impl AstNode for StmtList { fn can_cast(kind: SyntaxKind) -> bool { kind == STMT_LIST } fn cast(syntax: SyntaxNode) -> Option { @@ -4254,6 +4278,7 @@ impl AstNode for AnyHasName { | VARIANT | CONST_PARAM | TYPE_PARAM + | FORMAT_ARGS_ARG | IDENT_PAT ) } @@ -4860,6 +4885,11 @@ impl std::fmt::Display for UnderscoreExpr { std::fmt::Display::fmt(self.syntax(), f) } } +impl std::fmt::Display for FormatArgsArg { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} impl std::fmt::Display for StmtList { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) diff --git a/crates/syntax/src/tests/ast_src.rs b/crates/syntax/src/tests/ast_src.rs index 2c1d832d1e..341bda892b 100644 --- a/crates/syntax/src/tests/ast_src.rs +++ b/crates/syntax/src/tests/ast_src.rs @@ -169,6 +169,7 @@ pub(crate) const KINDS_SRC: KindsSrc<'_> = KindsSrc { "OFFSET_OF_EXPR", "ASM_EXPR", "FORMAT_ARGS_EXPR", + "FORMAT_ARGS_ARG", // postfix "CALL_EXPR", "INDEX_EXPR",